Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9024,6 +9024,49 @@ return (!LosesInfo && !APFloatToConvert.isDenormal()); } +// Use rldimi/rlwimi to construct vectors: +// i32 = (i8 << 24) | (i8 << 16) | (i8 << 8) | i +// i32 = (i16 << 16) | i16 +// i64 = (i32 << 32) | i32 +// And put two i64 together to get a vector. +static SDValue tryMaskInsertVector(SDValue Op, SelectionDAG &DAG, + bool LittleEndian) { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // There're already patterns for v4i32 and v2i64 construction. + if (VT == MVT::v16i8 || VT == MVT::v8i16) { + int NumElt = VT.getVectorNumElements(); + int ScalarSize = VT.getScalarSizeInBits(); + int EltsFor32 = NumElt / 4; + SDValue NewVecElts[4]; + SDValue Parts[4]; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < EltsFor32; ++j) { + SDValue Elt = LittleEndian + ? Op.getOperand(i * EltsFor32 + EltsFor32 - j - 1) + : Op.getOperand(i * EltsFor32 + j); + Parts[j] = DAG.getZExtOrTrunc(Elt, dl, MVT::i32); + + // Left-shift elements to insert, except the last, because offset is 0. + if (j != EltsFor32 - 1) + Parts[j] = + DAG.getNode(ISD::SHL, dl, MVT::i32, Parts[j], + DAG.getTargetConstant(ScalarSize * (EltsFor32 - j - 1), + dl, MVT::i32)); + if (j > 0) + Parts[j] = DAG.getNode(ISD::OR, dl, MVT::i32, Parts[j - 1], Parts[j]); + } + NewVecElts[i] = Parts[EltsFor32 - 1]; + } + + // Count on v4i32 to get optimized BUILD_VECTOR pattern. + return DAG.getBitcast(VT, DAG.getBuildVector(MVT::v4i32, dl, NewVecElts)); + } else { + return SDValue(); + } +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9133,6 +9176,13 @@ haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; + + // Try to construct vector using masked insert. + if (!BVN->isConstant() && !DAG.isSplatValue(Op, true)) + if (SDValue Res = + tryMaskInsertVector(Op, DAG, Subtarget.isLittleEndian())) + return Res; + return SDValue(); } Index: llvm/test/CodeGen/PowerPC/pr25080.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr25080.ll +++ llvm/test/CodeGen/PowerPC/pr25080.ll @@ -13,42 +13,40 @@ ; LE-NEXT: xxland 35, 35, 36 ; LE-NEXT: vcmpequw 2, 2, 5 ; LE-NEXT: vcmpequw 3, 3, 5 -; LE-NEXT: xxswapd 0, 34 +; LE-NEXT: xxsldwi 0, 34, 34, 3 +; LE-NEXT: xxswapd 1, 34 +; LE-NEXT: xxsldwi 2, 34, 34, 1 +; LE-NEXT: xxswapd 3, 35 ; LE-NEXT: mfvsrwz 3, 34 -; LE-NEXT: xxsldwi 1, 34, 34, 1 -; LE-NEXT: mfvsrwz 4, 35 -; LE-NEXT: xxsldwi 2, 34, 34, 3 -; LE-NEXT: mtvsrd 36, 3 -; LE-NEXT: mffprwz 3, 0 -; LE-NEXT: xxswapd 0, 35 -; LE-NEXT: mtvsrd 37, 4 -; LE-NEXT: mffprwz 4, 1 -; LE-NEXT: xxsldwi 1, 35, 35, 1 -; LE-NEXT: mtvsrd 34, 3 -; LE-NEXT: mffprwz 3, 2 -; LE-NEXT: mtvsrd 32, 4 -; LE-NEXT: mffprwz 4, 0 +; LE-NEXT: mffprwz 5, 0 ; LE-NEXT: xxsldwi 0, 35, 35, 3 -; LE-NEXT: mtvsrd 33, 3 -; LE-NEXT: mffprwz 3, 1 -; LE-NEXT: mtvsrd 38, 4 -; LE-NEXT: mtvsrd 35, 3 -; LE-NEXT: mffprwz 3, 0 -; LE-NEXT: vmrghh 2, 0, 2 -; LE-NEXT: mtvsrd 32, 3 +; LE-NEXT: mffprwz 6, 1 +; LE-NEXT: xxsldwi 1, 35, 35, 1 +; LE-NEXT: mffprwz 7, 2 +; LE-NEXT: slwi 5, 5, 16 +; LE-NEXT: mffprwz 8, 0 +; LE-NEXT: or 3, 5, 3 +; LE-NEXT: mffprwz 9, 1 +; LE-NEXT: slwi 7, 7, 16 +; LE-NEXT: mfvsrwz 4, 35 +; LE-NEXT: or 5, 7, 6 +; LE-NEXT: slwi 6, 8, 16 +; LE-NEXT: vspltish 3, 15 +; LE-NEXT: mffprwz 10, 3 +; LE-NEXT: slwi 7, 9, 16 +; LE-NEXT: rldimi 5, 3, 32, 0 ; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; LE-NEXT: vmrghh 4, 1, 4 +; LE-NEXT: or 4, 6, 4 +; LE-NEXT: mtfprd 0, 5 ; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; LE-NEXT: vmrghh 3, 3, 6 -; LE-NEXT: vmrghh 5, 0, 5 -; LE-NEXT: vmrglw 2, 4, 2 -; LE-NEXT: vspltish 4, 15 -; LE-NEXT: vmrglw 3, 5, 3 -; LE-NEXT: xxmrgld 34, 35, 34 -; LE-NEXT: lvx 3, 0, 3 -; LE-NEXT: xxlor 34, 34, 35 -; LE-NEXT: vslh 2, 2, 4 -; LE-NEXT: vsrah 2, 2, 4 +; LE-NEXT: or 6, 7, 10 +; LE-NEXT: lvx 2, 0, 3 +; LE-NEXT: rldimi 6, 4, 32, 0 +; LE-NEXT: mtfprd 1, 6 +; LE-NEXT: xxmrghd 0, 1, 0 +; LE-NEXT: xxlor 34, 0, 34 +; LE-NEXT: vslh 2, 2, 3 +; LE-NEXT: vsrah 2, 2, 3 ; LE-NEXT: blr ; ; BE-LABEL: pr25080: @@ -61,45 +59,40 @@ ; BE-NEXT: xxland 34, 34, 0 ; BE-NEXT: vcmpequw 3, 3, 4 ; BE-NEXT: vcmpequw 2, 2, 4 -; BE-NEXT: xxswapd 0, 35 +; BE-NEXT: xxsldwi 0, 35, 35, 3 +; BE-NEXT: xxswapd 1, 35 +; BE-NEXT: xxsldwi 2, 35, 35, 1 +; BE-NEXT: xxswapd 3, 34 ; BE-NEXT: mfvsrwz 3, 35 -; BE-NEXT: xxsldwi 1, 35, 35, 1 -; BE-NEXT: mfvsrwz 4, 34 -; BE-NEXT: mtvsrwz 36, 3 -; BE-NEXT: xxsldwi 2, 35, 35, 3 -; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: xxswapd 0, 34 -; BE-NEXT: mtvsrwz 35, 4 -; BE-NEXT: mffprwz 4, 1 +; BE-NEXT: mffprwz 5, 0 +; BE-NEXT: xxsldwi 0, 34, 34, 3 +; BE-NEXT: mffprwz 6, 1 ; BE-NEXT: xxsldwi 1, 34, 34, 1 -; BE-NEXT: mtvsrwz 37, 3 +; BE-NEXT: mffprwz 7, 2 +; BE-NEXT: slwi 5, 5, 16 +; BE-NEXT: mffprwz 8, 0 +; BE-NEXT: or 3, 5, 3 +; BE-NEXT: mffprwz 9, 1 +; BE-NEXT: slwi 7, 7, 16 +; BE-NEXT: mfvsrwz 4, 34 +; BE-NEXT: or 5, 7, 6 +; BE-NEXT: slwi 6, 8, 16 +; BE-NEXT: vspltish 2, 15 +; BE-NEXT: mffprwz 10, 3 +; BE-NEXT: slwi 7, 9, 16 +; BE-NEXT: rldimi 5, 3, 32, 0 ; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; BE-NEXT: or 4, 6, 4 +; BE-NEXT: mtfprd 0, 5 ; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; BE-NEXT: mtvsrwz 32, 4 -; BE-NEXT: mffprwz 4, 0 -; BE-NEXT: lxvw4x 33, 0, 3 -; BE-NEXT: xxsldwi 0, 34, 34, 3 -; BE-NEXT: mffprwz 3, 1 -; BE-NEXT: mffprwz 5, 2 -; BE-NEXT: vperm 2, 0, 5, 1 -; BE-NEXT: mtvsrwz 37, 3 -; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: mtvsrwz 38, 5 -; BE-NEXT: mtvsrwz 39, 4 -; BE-NEXT: mtvsrwz 32, 3 -; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha -; BE-NEXT: vperm 4, 6, 4, 1 -; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l -; BE-NEXT: vperm 5, 5, 7, 1 -; BE-NEXT: lxvw4x 0, 0, 3 -; BE-NEXT: vperm 3, 0, 3, 1 -; BE-NEXT: vmrghw 2, 4, 2 -; BE-NEXT: vmrghw 3, 3, 5 -; BE-NEXT: xxmrghd 34, 35, 34 -; BE-NEXT: vspltish 3, 15 -; BE-NEXT: xxlor 34, 34, 0 -; BE-NEXT: vslh 2, 2, 3 -; BE-NEXT: vsrah 2, 2, 3 +; BE-NEXT: or 6, 7, 10 +; BE-NEXT: rldimi 6, 4, 32, 0 +; BE-NEXT: mtfprd 1, 6 +; BE-NEXT: xxmrghd 0, 1, 0 +; BE-NEXT: lxvw4x 1, 0, 3 +; BE-NEXT: xxlor 35, 0, 1 +; BE-NEXT: vslh 3, 3, 2 +; BE-NEXT: vsrah 2, 3, 2 ; BE-NEXT: blr entry: %0 = trunc <8 x i32> %a to <8 x i23> Index: llvm/test/CodeGen/PowerPC/pre-inc-disable.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -341,53 +341,12 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: li r7, 16 -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: lxsihzx v4, r3, r4 -; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NEXT: lxsihzx v2, r6, r7 -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: mtvsrd v3, r6 -; CHECK-NEXT: vmrghh v4, v3, v4 -; CHECK-NEXT: vmrghh v2, v3, v2 -; CHECK-NEXT: vsplth v3, v3, 3 -; CHECK-NEXT: vmrglw v3, v4, v3 -; CHECK-NEXT: lxvx v4, 0, r3 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vperm v2, v2, v3, v4 -; CHECK-NEXT: xxspltw v3, v2, 2 -; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: vextuwrx r3, r3, v2 ; CHECK-NEXT: cmpw r3, r5 ; CHECK-NEXT: bgelr+ cr0 ; CHECK-NEXT: # %bb.1: # %if.then ; ; P9BE-LABEL: test16: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: sldi r4, r4, 1 -; P9BE-NEXT: li r7, 16 -; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: lxsihzx v5, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI3_1@toc@ha -; P9BE-NEXT: lxsihzx v2, r6, r7 -; P9BE-NEXT: addis r6, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI3_1@toc@l -; P9BE-NEXT: addi r6, r6, .LCPI3_0@toc@l -; P9BE-NEXT: lxvx v3, 0, r6 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vperm v2, v4, v2, v3 -; P9BE-NEXT: vperm v3, v4, v5, v3 -; P9BE-NEXT: vsplth v4, v4, 3 -; P9BE-NEXT: vmrghw v3, v4, v3 -; P9BE-NEXT: lxvx v4, 0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vperm v2, v3, v2, v4 -; P9BE-NEXT: xxspltw v3, v2, 1 -; P9BE-NEXT: vadduwm v2, v2, v3 -; P9BE-NEXT: vextuwlx r3, r3, v2 ; P9BE-NEXT: cmpw r3, r5 ; P9BE-NEXT: bgelr+ cr0 ; P9BE-NEXT: # %bb.1: # %if.then @@ -426,27 +385,6 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: lxsibzx v2, r3, r4 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: mtvsrd v3, r3 -; CHECK-NEXT: li r3, 8 -; CHECK-NEXT: lxsibzx v5, r6, r3 -; CHECK-NEXT: vspltb v4, v3, 7 -; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NEXT: vmrghb v2, v3, v2 -; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-NEXT: vmrglh v2, v2, v4 -; CHECK-NEXT: vmrghb v3, v3, v5 -; CHECK-NEXT: vmrglw v2, v2, v4 -; CHECK-NEXT: vmrglh v3, v3, v4 -; CHECK-NEXT: vmrglw v3, v4, v3 -; CHECK-NEXT: lxvx v4, 0, r3 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vperm v2, v3, v2, v4 -; CHECK-NEXT: xxspltw v3, v2, 2 -; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: vextuwrx r3, r3, v2 ; CHECK-NEXT: cmpw r3, r5 ; CHECK-NEXT: bgelr+ cr0 ; CHECK-NEXT: # %bb.1: # %if.then @@ -454,25 +392,12 @@ ; P9BE-LABEL: test8: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: li r7, 8 -; P9BE-NEXT: lxsibzx v5, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI4_1@toc@ha -; P9BE-NEXT: lxsibzx v2, r6, r7 -; P9BE-NEXT: addis r6, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI4_1@toc@l -; P9BE-NEXT: addi r6, r6, .LCPI4_0@toc@l -; P9BE-NEXT: lxvx v3, 0, r6 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vperm v2, v4, v2, v3 -; P9BE-NEXT: vperm v3, v4, v5, v3 -; P9BE-NEXT: vspltb v4, v4, 7 -; P9BE-NEXT: vmrghh v3, v3, v4 -; P9BE-NEXT: xxspltw v4, v4, 0 -; P9BE-NEXT: vmrghw v2, v3, v2 -; P9BE-NEXT: lxvx v3, 0, r3 +; P9BE-NEXT: lbzx r3, r3, r4 +; P9BE-NEXT: lbz r4, 8(r6) +; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vperm v2, v4, v2, v3 +; P9BE-NEXT: mtvsrwz v3, r4 +; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: xxspltw v3, v2, 1 ; P9BE-NEXT: vadduwm v2, v2, v3 ; P9BE-NEXT: vextuwlx r3, r3, v2 Index: llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -13,8 +13,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: lis r6, 31710 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: ori r6, r6, 63421 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -23,25 +25,25 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 31710 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: ori r4, r4, 63421 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: sub r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -124 -; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r6, r5, r6 +; P9LE-NEXT: sub r5, r6, r5 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: lis r6, -16728 +; P9LE-NEXT: mulli r5, r5, -124 +; P9LE-NEXT: ori r6, r6, 63249 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 ; P9LE-NEXT: lis r4, 21399 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: ori r4, r4, 33437 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 @@ -49,29 +51,29 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -16728 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 63249 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 8 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 8 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, -1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: lis r4, 31710 +; P9BE-NEXT: lis r6, -21386 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 63421 +; P9BE-NEXT: ori r6, r6, 37253 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: sub r4, r4, r3 @@ -80,162 +82,154 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, -124 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: mulhw r6, r5, r6 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: lis r6, 21399 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: ori r6, r6, 33437 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, -16728 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; P9BE-NEXT: ori r4, r4, 63249 -; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, -1003 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 5 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: mulhw r5, r5, r6 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srwi r5, r5, 5 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_1: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 21399 -; P8LE-NEXT: lis r8, -16728 -; P8LE-NEXT: lis r9, -21386 -; P8LE-NEXT: lis r10, 31710 -; P8LE-NEXT: ori r3, r3, 33437 -; P8LE-NEXT: ori r8, r8, 63249 -; P8LE-NEXT: ori r9, r9, 37253 -; P8LE-NEXT: ori r10, r10, 63421 +; P8LE-NEXT: lis r3, -16728 +; P8LE-NEXT: lis r9, 31710 +; P8LE-NEXT: lis r8, 21399 +; P8LE-NEXT: lis r11, -21386 +; P8LE-NEXT: ori r3, r3, 63249 +; P8LE-NEXT: ori r9, r9, 63421 +; P8LE-NEXT: ori r8, r8, 33437 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 32, 48 ; P8LE-NEXT: rldicl r6, r4, 16, 48 ; P8LE-NEXT: clrldi r7, r4, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r6, r6 ; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: extsh r10, r6 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r12, r4 +; P8LE-NEXT: mulhw r3, r10, r3 +; P8LE-NEXT: ori r10, r11, 37253 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: mulhw r3, r5, r3 -; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: mulhw r8, r6, r8 -; P8LE-NEXT: mulhw r9, r7, r9 -; P8LE-NEXT: mulhw r10, r4, r10 +; P8LE-NEXT: mulhw r9, r12, r9 +; P8LE-NEXT: mulhw r8, r5, r8 +; P8LE-NEXT: mulhw r10, r7, r10 ; P8LE-NEXT: srwi r11, r3, 31 -; P8LE-NEXT: srawi r3, r3, 5 +; P8LE-NEXT: srwi r3, r3, 8 +; P8LE-NEXT: sub r9, r9, r12 +; P8LE-NEXT: srawi r12, r8, 5 ; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: add r9, r9, r7 -; P8LE-NEXT: srawi r8, r8, 8 -; P8LE-NEXT: sub r10, r10, r4 -; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: srwi r8, r8, 31 +; P8LE-NEXT: add r10, r10, r7 ; P8LE-NEXT: srwi r11, r9, 31 -; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: mulli r3, r3, 98 +; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: add r8, r12, r8 +; P8LE-NEXT: mulli r3, r3, -1003 ; P8LE-NEXT: add r9, r9, r11 ; P8LE-NEXT: srwi r11, r10, 31 ; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: mulli r8, r8, -1003 +; P8LE-NEXT: mulli r8, r8, 98 ; P8LE-NEXT: add r10, r10, r11 -; P8LE-NEXT: mulli r9, r9, 95 -; P8LE-NEXT: mulli r10, r10, -124 -; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: mtvsrd v2, r3 -; P8LE-NEXT: sub r5, r6, r8 -; P8LE-NEXT: sub r3, r7, r9 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: mulli r9, r9, -124 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: sub r5, r5, r8 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: or r3, r3, r5 +; P8LE-NEXT: sub r5, r7, r10 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: mtvsrwz v2, r3 +; P8LE-NEXT: or r3, r4, r5 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_srem_vec_1: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, -16728 -; P8BE-NEXT: lis r8, 21399 -; P8BE-NEXT: lis r9, 31710 -; P8BE-NEXT: lis r10, -21386 -; P8BE-NEXT: ori r3, r3, 63249 -; P8BE-NEXT: ori r8, r8, 33437 -; P8BE-NEXT: ori r9, r9, 63421 -; P8BE-NEXT: ori r10, r10, 37253 +; P8BE-NEXT: lis r3, 21399 +; P8BE-NEXT: lis r9, -21386 +; P8BE-NEXT: lis r8, -16728 +; P8BE-NEXT: lis r11, 31710 +; P8BE-NEXT: ori r3, r3, 33437 +; P8BE-NEXT: ori r9, r9, 37253 +; P8BE-NEXT: ori r8, r8, 63249 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r6, r6 ; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r10, r6 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r12, r4 +; P8BE-NEXT: mulhw r3, r10, r3 +; P8BE-NEXT: ori r10, r11, 63421 ; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r8, r6, r8 -; P8BE-NEXT: mulhw r9, r7, r9 -; P8BE-NEXT: mulhw r10, r4, r10 +; P8BE-NEXT: mulhw r9, r12, r9 +; P8BE-NEXT: mulhw r8, r5, r8 +; P8BE-NEXT: mulhw r10, r7, r10 ; P8BE-NEXT: srwi r11, r3, 31 -; P8BE-NEXT: srawi r3, r3, 8 +; P8BE-NEXT: srwi r3, r3, 5 +; P8BE-NEXT: add r9, r9, r12 +; P8BE-NEXT: srawi r12, r8, 8 ; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: sub r9, r9, r7 -; P8BE-NEXT: srawi r8, r8, 5 -; P8BE-NEXT: add r10, r10, r4 -; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: sub r10, r10, r7 ; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: srawi r9, r9, 6 -; P8BE-NEXT: mulli r3, r3, -1003 +; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: add r8, r12, r8 +; P8BE-NEXT: mulli r3, r3, 98 ; P8BE-NEXT: add r9, r9, r11 ; P8BE-NEXT: srwi r11, r10, 31 ; P8BE-NEXT: srawi r10, r10, 6 -; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: mulli r8, r8, -1003 ; P8BE-NEXT: add r10, r10, r11 -; P8BE-NEXT: mulli r9, r9, -124 -; P8BE-NEXT: mulli r10, r10, 95 -; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: mulli r9, r9, 95 +; P8BE-NEXT: mulli r10, r10, -124 +; P8BE-NEXT: sub r3, r6, r3 +; P8BE-NEXT: sub r5, r5, r8 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r4, r4, r9 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r5, r7, r10 +; P8BE-NEXT: slwi r4, r4, 16 ; P8BE-NEXT: mtvsrwz v2, r3 -; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: lxvw4x v3, 0, r3 -; P8BE-NEXT: sub r3, r7, r9 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: or r3, r4, r5 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -256,21 +250,21 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r5, r3, r4 -; P9LE-NEXT: add r5, r5, r3 -; P9LE-NEXT: srwi r6, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r6 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r6, r5 +; P9LE-NEXT: mulhw r7, r6, r4 +; P9LE-NEXT: add r6, r7, r6 +; P9LE-NEXT: srwi r7, r6, 31 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: slwi r5, r5, 16 +; P9LE-NEXT: or r3, r5, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 ; P9LE-NEXT: add r5, r5, r3 @@ -279,20 +273,20 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r6, r5 +; P9LE-NEXT: mulhw r4, r6, r4 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_2: @@ -309,25 +303,22 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r6, r5 +; P9BE-NEXT: mulhw r7, r6, r4 +; P9BE-NEXT: add r6, r7, r6 +; P9BE-NEXT: srwi r7, r6, 31 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: slwi r5, r5, 16 +; P9BE-NEXT: or r3, r5, r3 ; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r4 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r6, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r6 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 @@ -335,20 +326,20 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 -; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r6, r5 +; P9BE-NEXT: mulhw r4, r6, r4 +; P9BE-NEXT: add r4, r4, r6 +; P9BE-NEXT: srwi r6, r4, 31 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_2: @@ -357,49 +348,49 @@ ; P8LE-NEXT: lis r3, -21386 ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: mulhw r8, r5, r3 ; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r10, r4 +; P8LE-NEXT: mulhw r11, r8, r3 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: mulhw r9, r6, r3 -; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: mulhw r10, r7, r3 -; P8LE-NEXT: mulhw r3, r4, r3 -; P8LE-NEXT: add r8, r8, r5 -; P8LE-NEXT: add r9, r9, r6 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r7 -; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: srwi r11, r9, 31 -; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: mulli r8, r8, 95 -; P8LE-NEXT: add r9, r9, r11 -; P8LE-NEXT: srwi r11, r10, 31 -; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: mulli r9, r9, 95 -; P8LE-NEXT: add r10, r10, r11 +; P8LE-NEXT: mulhw r9, r5, r3 +; P8LE-NEXT: mulhw r12, r10, r3 +; P8LE-NEXT: mulhw r3, r7, r3 +; P8LE-NEXT: add r8, r11, r8 +; P8LE-NEXT: add r9, r9, r5 +; P8LE-NEXT: add r10, r12, r10 +; P8LE-NEXT: srwi r12, r8, 31 +; P8LE-NEXT: srwi r8, r8, 6 +; P8LE-NEXT: srawi r11, r9, 6 +; P8LE-NEXT: add r3, r3, r7 +; P8LE-NEXT: srwi r9, r9, 31 +; P8LE-NEXT: add r8, r8, r12 +; P8LE-NEXT: srwi r12, r10, 31 +; P8LE-NEXT: srwi r10, r10, 6 +; P8LE-NEXT: add r9, r11, r9 +; P8LE-NEXT: add r10, r10, r12 ; P8LE-NEXT: srwi r11, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: mulli r10, r10, 95 -; P8LE-NEXT: sub r5, r5, r8 +; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: mtvsrd v2, r5 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: sub r5, r7, r10 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: sub r4, r4, r10 +; P8LE-NEXT: slwi r6, r6, 16 +; P8LE-NEXT: sub r5, r5, r9 +; P8LE-NEXT: sub r3, r7, r3 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: or r5, r6, r5 +; P8LE-NEXT: or r3, r4, r3 +; P8LE-NEXT: mtvsrwz v2, r5 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_srem_vec_2: @@ -407,52 +398,49 @@ ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, -21386 ; P8BE-NEXT: ori r3, r3, 37253 -; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: mulhw r8, r5, r3 ; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r8, r6 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r10, r4 +; P8BE-NEXT: mulhw r11, r8, r3 ; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r9, r6, r3 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r10, r7, r3 -; P8BE-NEXT: mulhw r3, r4, r3 -; P8BE-NEXT: add r8, r8, r5 -; P8BE-NEXT: add r9, r9, r6 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: srawi r8, r8, 6 -; P8BE-NEXT: add r10, r10, r7 -; P8BE-NEXT: add r3, r3, r4 -; P8BE-NEXT: add r8, r8, r11 -; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: srawi r9, r9, 6 -; P8BE-NEXT: mulli r8, r8, 95 -; P8BE-NEXT: add r9, r9, r11 -; P8BE-NEXT: srwi r11, r10, 31 -; P8BE-NEXT: srawi r10, r10, 6 -; P8BE-NEXT: mulli r9, r9, 95 -; P8BE-NEXT: add r10, r10, r11 +; P8BE-NEXT: mulhw r9, r5, r3 +; P8BE-NEXT: mulhw r12, r10, r3 +; P8BE-NEXT: mulhw r3, r7, r3 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: add r9, r9, r5 +; P8BE-NEXT: add r10, r12, r10 +; P8BE-NEXT: srwi r12, r8, 31 +; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: srawi r11, r9, 6 +; P8BE-NEXT: add r3, r3, r7 +; P8BE-NEXT: srwi r9, r9, 31 +; P8BE-NEXT: add r8, r8, r12 +; P8BE-NEXT: srwi r12, r10, 31 +; P8BE-NEXT: srwi r10, r10, 6 +; P8BE-NEXT: add r9, r11, r9 +; P8BE-NEXT: add r10, r10, r12 ; P8BE-NEXT: srwi r11, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 -; P8BE-NEXT: mulli r10, r10, 95 -; P8BE-NEXT: sub r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha +; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: mtvsrwz v2, r5 -; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l +; P8BE-NEXT: mulli r10, r10, 95 +; P8BE-NEXT: mulli r9, r9, 95 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: sub r5, r7, r10 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: sub r4, r4, r10 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r5, r5, r9 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r5, r6, r5 +; P8BE-NEXT: or r3, r4, r3 +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -463,68 +451,68 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-LABEL: combine_srem_sdiv: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, -21386 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 37253 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r5, r3, r4 -; P9LE-NEXT: add r5, r5, r3 -; P9LE-NEXT: srwi r6, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r6 -; P9LE-NEXT: mulli r6, r5, 95 -; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r6, r3 -; P9LE-NEXT: mulhw r7, r6, r4 -; P9LE-NEXT: add r6, r7, r6 -; P9LE-NEXT: srwi r7, r6, 31 -; P9LE-NEXT: srawi r6, r6, 6 -; P9LE-NEXT: add r6, r6, r7 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r7, r3 -; P9LE-NEXT: mulhw r8, r7, r4 -; P9LE-NEXT: add r7, r8, r7 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r6, r4, r5 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: sub r3, r3, r6 +; P9LE-NEXT: li r6, 0 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r6, r6 +; P9LE-NEXT: mulhw r7, r6, r5 +; P9LE-NEXT: add r7, r7, r6 ; P9LE-NEXT: srwi r8, r7, 31 ; P9LE-NEXT: srawi r7, r7, 6 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: or r3, r3, r6 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r8, r3 -; P9LE-NEXT: mulhw r4, r8, r4 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: srwi r8, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: extsh r6, r3 +; P9LE-NEXT: mulhw r8, r6, r5 +; P9LE-NEXT: add r6, r8, r6 +; P9LE-NEXT: srwi r8, r6, 31 +; P9LE-NEXT: srawi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r8 +; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 4 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r9, r8 +; P9LE-NEXT: mulhw r5, r9, r5 +; P9LE-NEXT: add r5, r5, r9 +; P9LE-NEXT: srwi r9, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r9 +; P9LE-NEXT: mulli r9, r5, 95 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: or r3, r3, r8 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: slwi r3, r4, 16 +; P9LE-NEXT: or r3, r3, r7 +; P9LE-NEXT: vmrghw v2, v2, v3 +; P9LE-NEXT: mtvsrwz v3, r3 +; P9LE-NEXT: slwi r3, r6, 16 +; P9LE-NEXT: or r3, r3, r5 +; P9LE-NEXT: mtvsrwz v4, r3 +; P9LE-NEXT: vmrghw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: combine_srem_sdiv: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: lis r5, -21386 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 37253 @@ -536,53 +524,50 @@ ; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r6, r3 -; P9BE-NEXT: mulhw r7, r6, r5 -; P9BE-NEXT: add r6, r7, r6 -; P9BE-NEXT: srwi r7, r6, 31 -; P9BE-NEXT: srawi r6, r6, 6 -; P9BE-NEXT: add r6, r6, r7 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r7, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: li r6, 6 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r7, r6 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 ; P9BE-NEXT: srawi r7, r7, 6 ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: or r3, r3, r6 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r5 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r8, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r8 -; P9BE-NEXT: mulli r8, r5, 95 -; P9BE-NEXT: mtvsrwz v0, r5 +; P9BE-NEXT: extsh r6, r3 +; P9BE-NEXT: mulhw r8, r6, r5 +; P9BE-NEXT: add r6, r8, r6 +; P9BE-NEXT: srwi r8, r6, 31 +; P9BE-NEXT: srawi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r8 +; P9BE-NEXT: mulli r8, r6, 95 ; P9BE-NEXT: sub r3, r3, r8 +; P9BE-NEXT: li r8, 2 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r9, r8 +; P9BE-NEXT: mulhw r5, r9, r5 +; P9BE-NEXT: add r5, r5, r9 +; P9BE-NEXT: srwi r9, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r9 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: or r3, r3, r8 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrwz v3, r4 -; P9BE-NEXT: vperm v3, v4, v3, v5 -; P9BE-NEXT: mtvsrwz v4, r7 -; P9BE-NEXT: vperm v4, v0, v4, v5 -; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: slwi r3, r4, 16 +; P9BE-NEXT: or r3, r3, r7 +; P9BE-NEXT: vmrgow v2, v2, v3 +; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: slwi r3, r6, 16 +; P9BE-NEXT: or r3, r3, r5 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vmrgow v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -590,58 +575,60 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, -21386 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: extsh r8, r5 +; P8LE-NEXT: extsh r6, r6 +; P8LE-NEXT: rldicl r4, r4, 32, 48 ; P8LE-NEXT: extsh r9, r7 -; P8LE-NEXT: mulhw r10, r5, r3 -; P8LE-NEXT: mulhw r11, r8, r3 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: mulhw r12, r9, r3 -; P8LE-NEXT: extsh r0, r4 -; P8LE-NEXT: mulhw r3, r0, r3 -; P8LE-NEXT: add r10, r10, r5 -; P8LE-NEXT: add r8, r11, r8 -; P8LE-NEXT: srwi r11, r10, 31 -; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: srawi r10, r10, 6 +; P8LE-NEXT: mulhw r10, r8, r3 +; P8LE-NEXT: extsh r11, r4 +; P8LE-NEXT: mulhw r12, r6, r3 +; P8LE-NEXT: mulhw r0, r9, r3 +; P8LE-NEXT: mulhw r3, r11, r3 +; P8LE-NEXT: add r8, r10, r8 +; P8LE-NEXT: add r10, r12, r6 +; P8LE-NEXT: add r9, r0, r9 ; P8LE-NEXT: srawi r12, r8, 6 ; P8LE-NEXT: srwi r8, r8, 31 -; P8LE-NEXT: add r10, r10, r11 -; P8LE-NEXT: add r3, r3, r0 -; P8LE-NEXT: srawi r11, r9, 6 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: srawi r11, r10, 6 +; P8LE-NEXT: srawi r0, r9, 6 +; P8LE-NEXT: srwi r10, r10, 31 ; P8LE-NEXT: srwi r9, r9, 31 ; P8LE-NEXT: add r8, r12, r8 -; P8LE-NEXT: mtvsrd v2, r10 +; P8LE-NEXT: srawi r30, r3, 6 +; P8LE-NEXT: srwi r3, r3, 31 +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: mulli r11, r8, 95 +; P8LE-NEXT: add r3, r30, r3 +; P8LE-NEXT: slwi r8, r8, 16 ; P8LE-NEXT: mulli r12, r10, 95 -; P8LE-NEXT: add r9, r11, r9 -; P8LE-NEXT: srwi r11, r3, 31 -; P8LE-NEXT: mtvsrd v3, r8 -; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: mulli r10, r8, 95 -; P8LE-NEXT: mtvsrd v4, r9 -; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: mulli r8, r9, 95 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: mulli r9, r3, 95 -; P8LE-NEXT: sub r5, r5, r12 -; P8LE-NEXT: sub r6, r6, r10 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: mtvsrd v5, r6 -; P8LE-NEXT: sub r5, r7, r8 -; P8LE-NEXT: sub r4, r4, r9 -; P8LE-NEXT: mtvsrd v0, r5 -; P8LE-NEXT: mtvsrd v1, r4 -; P8LE-NEXT: vmrghh v3, v5, v3 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v0, v1, v0 -; P8LE-NEXT: vmrghh v4, v5, v4 -; P8LE-NEXT: vmrglw v3, v0, v3 -; P8LE-NEXT: vmrglw v2, v4, v2 +; P8LE-NEXT: mulli r0, r9, 95 +; P8LE-NEXT: or r8, r8, r10 +; P8LE-NEXT: mulli r30, r3, 95 +; P8LE-NEXT: mtvsrwz v2, r8 +; P8LE-NEXT: sub r5, r5, r11 +; P8LE-NEXT: sub r6, r6, r12 +; P8LE-NEXT: sub r7, r7, r0 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r4, r30 +; P8LE-NEXT: slwi r7, r7, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: slwi r6, r9, 16 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: or r4, r7, r4 +; P8LE-NEXT: or r3, r6, r3 +; P8LE-NEXT: mtvsrwz v3, r5 +; P8LE-NEXT: mtvsrwz v4, r4 +; P8LE-NEXT: mtvsrwz v5, r3 +; P8LE-NEXT: vmrghw v3, v4, v3 +; P8LE-NEXT: vmrghw v2, v5, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -650,62 +637,59 @@ ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, -21386 ; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8BE-NEXT: addis r30, r2, .LCPI2_0@toc@ha ; P8BE-NEXT: ori r3, r3, 37253 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r6, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: extsh r8, r5 ; P8BE-NEXT: extsh r9, r6 +; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: extsh r10, r7 ; P8BE-NEXT: mulhw r11, r8, r3 -; P8BE-NEXT: mulhw r12, r9, r3 -; P8BE-NEXT: rldicl r4, r4, 16, 48 -; P8BE-NEXT: mulhw r0, r10, r3 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r3, r4, r3 +; P8BE-NEXT: extsh r12, r4 +; P8BE-NEXT: mulhw r0, r9, r3 +; P8BE-NEXT: mulhw r30, r10, r3 +; P8BE-NEXT: mulhw r3, r12, r3 ; P8BE-NEXT: add r8, r11, r8 -; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: srawi r8, r8, 6 -; P8BE-NEXT: addi r0, r30, .LCPI2_0@toc@l -; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: add r9, r0, r9 +; P8BE-NEXT: add r10, r30, r10 +; P8BE-NEXT: srawi r11, r8, 6 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: add r3, r3, r12 ; P8BE-NEXT: srawi r12, r9, 6 +; P8BE-NEXT: srawi r0, r10, 6 ; P8BE-NEXT: srwi r9, r9, 31 -; P8BE-NEXT: add r8, r8, r11 -; P8BE-NEXT: add r3, r3, r4 -; P8BE-NEXT: lxvw4x v2, 0, r0 -; P8BE-NEXT: srawi r11, r10, 6 ; P8BE-NEXT: srwi r10, r10, 31 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: srawi r30, r3, 6 +; P8BE-NEXT: srwi r3, r3, 31 ; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: mtvsrwz v3, r8 -; P8BE-NEXT: mulli r12, r8, 95 -; P8BE-NEXT: add r10, r11, r10 -; P8BE-NEXT: srwi r11, r3, 31 -; P8BE-NEXT: mtvsrwz v4, r9 -; P8BE-NEXT: srawi r3, r3, 6 -; P8BE-NEXT: mulli r8, r9, 95 -; P8BE-NEXT: mtvsrwz v5, r10 -; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: mulli r9, r10, 95 -; P8BE-NEXT: vperm v3, v4, v3, v2 -; P8BE-NEXT: mulli r10, r3, 95 -; P8BE-NEXT: sub r5, r5, r12 -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: mtvsrwz v4, r5 -; P8BE-NEXT: mtvsrwz v0, r6 -; P8BE-NEXT: sub r5, r7, r9 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v1, r5 -; P8BE-NEXT: mtvsrwz v6, r4 -; P8BE-NEXT: vperm v4, v0, v4, v2 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v1, v6, v1, v2 -; P8BE-NEXT: vperm v2, v0, v5, v2 -; P8BE-NEXT: vmrghw v4, v1, v4 -; P8BE-NEXT: vmrghw v2, v2, v3 -; P8BE-NEXT: vadduhm v2, v4, v2 +; P8BE-NEXT: add r10, r0, r10 +; P8BE-NEXT: mulli r11, r8, 95 +; P8BE-NEXT: add r3, r30, r3 +; P8BE-NEXT: slwi r8, r8, 16 +; P8BE-NEXT: mulli r12, r9, 95 +; P8BE-NEXT: mulli r0, r10, 95 +; P8BE-NEXT: or r8, r8, r9 +; P8BE-NEXT: mulli r30, r3, 95 +; P8BE-NEXT: mtvsrwz v2, r8 +; P8BE-NEXT: sub r5, r5, r11 +; P8BE-NEXT: sub r6, r6, r12 +; P8BE-NEXT: sub r7, r7, r0 +; P8BE-NEXT: slwi r5, r5, 16 +; P8BE-NEXT: sub r4, r4, r30 +; P8BE-NEXT: slwi r7, r7, 16 +; P8BE-NEXT: or r5, r5, r6 +; P8BE-NEXT: slwi r6, r10, 16 +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: or r4, r7, r4 +; P8BE-NEXT: or r3, r6, r3 +; P8BE-NEXT: mtvsrwz v3, r5 +; P8BE-NEXT: mtvsrwz v4, r4 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: vmrgow v3, v4, v3 +; P8BE-NEXT: vmrgow v2, v5, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -724,39 +708,39 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 6 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 5 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 5 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -21386 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: srawi r5, r5, 5 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 5 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 3 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 3 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: srawi r5, r4, 3 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 3 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: or r3, r3, r4 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_power_of_two: @@ -768,24 +752,21 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 5 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 6 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 6 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 6 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -793,17 +774,17 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 3 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 3 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: srawi r5, r5, 3 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 3 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_power_of_two: @@ -813,39 +794,39 @@ ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: clrldi r6, r4, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: mulhw r3, r5, r3 -; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: srawi r8, r6, 6 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: extsh r6, r5 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: addze r8, r8 +; P8LE-NEXT: mulhw r3, r6, r3 +; P8LE-NEXT: srawi r8, r7, 6 +; P8LE-NEXT: rldicl r9, r4, 48, 48 ; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: srawi r9, r7, 5 ; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: slwi r8, r8, 6 -; P8LE-NEXT: add r3, r3, r5 -; P8LE-NEXT: addze r9, r9 -; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: add r3, r3, r6 +; P8LE-NEXT: addze r6, r8 +; P8LE-NEXT: extsh r8, r9 ; P8LE-NEXT: srwi r10, r3, 31 -; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: slwi r8, r9, 5 -; P8LE-NEXT: mtvsrd v2, r6 +; P8LE-NEXT: srwi r3, r3, 6 +; P8LE-NEXT: slwi r6, r6, 6 ; P8LE-NEXT: add r3, r3, r10 -; P8LE-NEXT: srawi r9, r4, 3 -; P8LE-NEXT: sub r6, r7, r8 +; P8LE-NEXT: srawi r8, r8, 5 +; P8LE-NEXT: sub r6, r7, r6 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: addze r7, r9 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: addze r8, r8 +; P8LE-NEXT: slwi r8, r8, 5 +; P8LE-NEXT: srawi r10, r4, 3 +; P8LE-NEXT: sub r8, r9, r8 +; P8LE-NEXT: addze r9, r10 +; P8LE-NEXT: slwi r7, r9, 3 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: slwi r5, r7, 3 -; P8LE-NEXT: sub r4, r4, r5 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: slwi r5, r8, 16 +; P8LE-NEXT: sub r4, r4, r7 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: or r3, r3, r4 +; P8LE-NEXT: mtvsrwz v2, r5 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_power_of_two: @@ -858,38 +839,35 @@ ; P8BE-NEXT: extsh r5, r5 ; P8BE-NEXT: extsh r6, r6 ; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: rldicl r7, r4, 16, 48 -; P8BE-NEXT: srawi r8, r6, 5 -; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: addze r8, r8 +; P8BE-NEXT: srawi r7, r6, 5 +; P8BE-NEXT: rldicl r8, r4, 16, 48 +; P8BE-NEXT: addze r7, r7 +; P8BE-NEXT: extsh r9, r8 ; P8BE-NEXT: rldicl r4, r4, 48, 48 -; P8BE-NEXT: srawi r9, r7, 6 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: slwi r8, r8, 5 +; P8BE-NEXT: slwi r7, r7, 5 +; P8BE-NEXT: srawi r9, r9, 6 +; P8BE-NEXT: sub r6, r6, r7 ; P8BE-NEXT: add r3, r3, r5 ; P8BE-NEXT: addze r9, r9 -; P8BE-NEXT: sub r6, r6, r8 ; P8BE-NEXT: srwi r10, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 -; P8BE-NEXT: slwi r8, r9, 6 -; P8BE-NEXT: mtvsrwz v2, r6 +; P8BE-NEXT: slwi r9, r9, 6 ; P8BE-NEXT: add r3, r3, r10 -; P8BE-NEXT: srawi r9, r4, 3 -; P8BE-NEXT: addis r10, r2, .LCPI3_0@toc@ha -; P8BE-NEXT: sub r6, r7, r8 +; P8BE-NEXT: extsh r10, r4 +; P8BE-NEXT: sub r8, r8, r9 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: addze r8, r9 -; P8BE-NEXT: addi r7, r10, .LCPI3_0@toc@l -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: lxvw4x v3, 0, r7 +; P8BE-NEXT: srawi r10, r10, 3 +; P8BE-NEXT: slwi r7, r8, 16 +; P8BE-NEXT: addze r10, r10 +; P8BE-NEXT: or r6, r7, r6 +; P8BE-NEXT: slwi r9, r10, 3 +; P8BE-NEXT: mtvsrwz v2, r6 +; P8BE-NEXT: sub r4, r4, r9 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: slwi r5, r8, 3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: sub r4, r4, r5 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r3, r4, r3 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v2, v3 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -900,25 +878,25 @@ ; P9LE-LABEL: dont_fold_srem_one: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: lis r4, -14230 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: lis r6, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 30865 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: ori r6, r6, 47143 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 9 +; P9LE-NEXT: srwi r4, r4, 9 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: sub r3, r3, r4 ; P9LE-NEXT: lis r4, -19946 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: slwi r3, r3, 16 ; P9LE-NEXT: ori r4, r4, 17097 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v3, v4 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -927,165 +905,149 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_one: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: lis r4, -14230 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 4 +; P9BE-NEXT: srawi r4, r4, 9 ; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: lis r5, -19946 +; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: ori r5, r5, 17097 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 ; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -14230 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: ori r4, r4, 30865 -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 -; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v3, v4, v5 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: add r4, r5, r4 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 9 +; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: lis r5, 24749 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: or r3, r3, r4 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r5, 24749 -; P8LE-NEXT: lis r6, -19946 +; P8LE-NEXT: lis r5, -19946 +; P8LE-NEXT: lis r6, 24749 ; P8LE-NEXT: lis r8, -14230 -; P8LE-NEXT: ori r5, r5, 47143 -; P8LE-NEXT: ori r6, r6, 17097 +; P8LE-NEXT: ori r5, r5, 17097 +; P8LE-NEXT: ori r6, r6, 47143 ; P8LE-NEXT: ori r8, r8, 30865 ; P8LE-NEXT: mffprd r3, f0 -; P8LE-NEXT: rldicl r4, r3, 16, 48 -; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: rldicl r4, r3, 32, 48 +; P8LE-NEXT: rldicl r7, r3, 16, 48 ; P8LE-NEXT: rldicl r3, r3, 48, 48 ; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: extsh r3, r3 +; P8LE-NEXT: extsh r9, r7 +; P8LE-NEXT: extsh r10, r3 ; P8LE-NEXT: mulhw r5, r4, r5 -; P8LE-NEXT: mulhw r6, r7, r6 -; P8LE-NEXT: mulhw r8, r3, r8 -; P8LE-NEXT: srwi r9, r5, 31 -; P8LE-NEXT: srawi r5, r5, 11 -; P8LE-NEXT: add r6, r6, r7 -; P8LE-NEXT: add r8, r8, r3 -; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: mulhw r6, r9, r6 +; P8LE-NEXT: mulhw r8, r10, r8 +; P8LE-NEXT: add r5, r5, r4 ; P8LE-NEXT: srwi r9, r6, 31 -; P8LE-NEXT: srawi r6, r6, 4 +; P8LE-NEXT: srwi r6, r6, 11 +; P8LE-NEXT: add r8, r8, r10 ; P8LE-NEXT: add r6, r6, r9 +; P8LE-NEXT: srwi r9, r5, 31 +; P8LE-NEXT: srawi r5, r5, 4 +; P8LE-NEXT: mulli r6, r6, 5423 +; P8LE-NEXT: add r5, r5, r9 ; P8LE-NEXT: srwi r9, r8, 31 -; P8LE-NEXT: srawi r8, r8, 9 -; P8LE-NEXT: mulli r5, r5, 5423 +; P8LE-NEXT: srwi r8, r8, 9 ; P8LE-NEXT: add r8, r8, r9 -; P8LE-NEXT: mulli r6, r6, 23 -; P8LE-NEXT: li r9, 0 +; P8LE-NEXT: mulli r5, r5, 23 ; P8LE-NEXT: mulli r8, r8, 654 -; P8LE-NEXT: mtvsrd v2, r9 +; P8LE-NEXT: sub r6, r7, r6 ; P8LE-NEXT: sub r4, r4, r5 -; P8LE-NEXT: sub r5, r7, r6 -; P8LE-NEXT: mtvsrd v3, r4 +; P8LE-NEXT: slwi r5, r6, 16 ; P8LE-NEXT: sub r3, r3, r8 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v3, v4 -; P8LE-NEXT: vmrghh v2, v5, v2 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: or r4, r5, r4 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: mtvsrwz v2, r4 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_one: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 24749 -; P8BE-NEXT: lis r7, -19946 -; P8BE-NEXT: lis r8, -14230 -; P8BE-NEXT: ori r3, r3, 47143 -; P8BE-NEXT: ori r7, r7, 17097 -; P8BE-NEXT: ori r8, r8, 30865 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: lis r3, -19946 +; P8BE-NEXT: lis r7, 24749 +; P8BE-NEXT: lis r9, -14230 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: ori r7, r7, 47143 +; P8BE-NEXT: ori r9, r9, 30865 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r8, r4, 48 +; P8BE-NEXT: extsh r6, r5 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsh r8, r8 +; P8BE-NEXT: mulhw r3, r6, r3 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: mulhw r7, r6, r7 -; P8BE-NEXT: mulhw r8, r4, r8 -; P8BE-NEXT: srawi r9, r3, 11 -; P8BE-NEXT: srwi r3, r3, 31 -; P8BE-NEXT: add r7, r7, r6 -; P8BE-NEXT: add r8, r8, r4 -; P8BE-NEXT: add r3, r9, r3 -; P8BE-NEXT: srwi r9, r7, 31 -; P8BE-NEXT: srawi r7, r7, 4 -; P8BE-NEXT: srawi r10, r8, 9 -; P8BE-NEXT: srwi r8, r8, 31 -; P8BE-NEXT: add r7, r7, r9 -; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: add r8, r10, r8 -; P8BE-NEXT: li r10, 0 -; P8BE-NEXT: mulli r7, r7, 23 -; P8BE-NEXT: mulli r8, r8, 654 -; P8BE-NEXT: mtvsrwz v2, r10 +; P8BE-NEXT: mulhw r7, r8, r7 +; P8BE-NEXT: mulhw r9, r4, r9 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r3, 31 +; P8BE-NEXT: srwi r3, r3, 4 +; P8BE-NEXT: add r9, r9, r4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r7, 31 +; P8BE-NEXT: srawi r7, r7, 11 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: add r6, r7, r6 +; P8BE-NEXT: srwi r7, r9, 31 +; P8BE-NEXT: srawi r9, r9, 9 +; P8BE-NEXT: mulli r6, r6, 5423 +; P8BE-NEXT: add r7, r9, r7 +; P8BE-NEXT: mulli r7, r7, 654 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r6, r7 -; P8BE-NEXT: mtvsrwz v4, r3 -; P8BE-NEXT: sub r3, r4, r8 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: vmrghw v2, v2, v4 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r5, r8, r6 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r4, r4, r7 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1097,8 +1059,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: lis r6, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: ori r6, r6, 47143 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -1107,161 +1071,143 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 15 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 15 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 15 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v3, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_i16_smax: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: lis r5, -19946 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: ori r5, r5, 17097 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: add r4, r5, r4 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 4 +; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, 24749 ; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: or r3, r3, r4 ; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v3, v4, v5 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 15 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_i16_smax: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, 24749 -; P8LE-NEXT: lis r5, -19946 -; P8LE-NEXT: ori r4, r4, 47143 -; P8LE-NEXT: ori r5, r5, 17097 +; P8LE-NEXT: lis r5, 24749 +; P8LE-NEXT: lis r4, -19946 +; P8LE-NEXT: ori r5, r5, 47143 +; P8LE-NEXT: ori r4, r4, 17097 ; P8LE-NEXT: mffprd r3, f0 -; P8LE-NEXT: rldicl r6, r3, 16, 48 -; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: rldicl r7, r3, 16, 48 +; P8LE-NEXT: rldicl r6, r3, 32, 48 +; P8LE-NEXT: extsh r8, r7 ; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: mulhw r5, r8, r5 ; P8LE-NEXT: mulhw r4, r6, r4 -; P8LE-NEXT: mulhw r5, r7, r5 ; P8LE-NEXT: rldicl r3, r3, 48, 48 -; P8LE-NEXT: extsh r3, r3 -; P8LE-NEXT: srwi r8, r4, 31 -; P8LE-NEXT: srawi r4, r4, 11 -; P8LE-NEXT: add r5, r5, r7 -; P8LE-NEXT: add r4, r4, r8 ; P8LE-NEXT: srwi r8, r5, 31 -; P8LE-NEXT: srawi r5, r5, 4 -; P8LE-NEXT: mulli r4, r4, 5423 +; P8LE-NEXT: srwi r5, r5, 11 +; P8LE-NEXT: add r4, r4, r6 ; P8LE-NEXT: add r5, r5, r8 -; P8LE-NEXT: srawi r9, r3, 15 -; P8LE-NEXT: li r8, 0 -; P8LE-NEXT: mulli r5, r5, 23 -; P8LE-NEXT: mtvsrd v2, r8 -; P8LE-NEXT: sub r4, r6, r4 -; P8LE-NEXT: addze r6, r9 -; P8LE-NEXT: slwi r6, r6, 15 -; P8LE-NEXT: mtvsrd v3, r4 +; P8LE-NEXT: srwi r8, r4, 31 +; P8LE-NEXT: srawi r4, r4, 4 +; P8LE-NEXT: mulli r5, r5, 5423 +; P8LE-NEXT: add r4, r4, r8 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: mulli r4, r4, 23 +; P8LE-NEXT: srawi r8, r8, 15 ; P8LE-NEXT: sub r5, r7, r5 -; P8LE-NEXT: sub r3, r3, r6 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v3, v4 -; P8LE-NEXT: vmrghh v2, v5, v2 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: addze r7, r8 +; P8LE-NEXT: slwi r7, r7, 15 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r6, r4 +; P8LE-NEXT: sub r3, r3, r7 +; P8LE-NEXT: or r4, r5, r4 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: mtvsrwz v2, r4 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_i16_smax: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r3, v2 -; P8BE-NEXT: lis r4, 24749 -; P8BE-NEXT: lis r5, -19946 -; P8BE-NEXT: li r9, 0 -; P8BE-NEXT: ori r4, r4, 47143 -; P8BE-NEXT: ori r5, r5, 17097 -; P8BE-NEXT: mtvsrwz v2, r9 -; P8BE-NEXT: clrldi r6, r3, 48 -; P8BE-NEXT: rldicl r7, r3, 48, 48 -; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r4, r6, r4 -; P8BE-NEXT: mulhw r5, r7, r5 -; P8BE-NEXT: rldicl r3, r3, 32, 48 -; P8BE-NEXT: extsh r3, r3 -; P8BE-NEXT: srwi r8, r4, 31 -; P8BE-NEXT: srawi r4, r4, 11 -; P8BE-NEXT: add r5, r5, r7 -; P8BE-NEXT: add r4, r4, r8 -; P8BE-NEXT: srwi r8, r5, 31 -; P8BE-NEXT: srawi r5, r5, 4 -; P8BE-NEXT: mulli r4, r4, 5423 -; P8BE-NEXT: add r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha -; P8BE-NEXT: srawi r10, r3, 15 -; P8BE-NEXT: mulli r5, r5, 23 -; P8BE-NEXT: sub r4, r6, r4 -; P8BE-NEXT: addi r6, r8, .LCPI5_0@toc@l -; P8BE-NEXT: addze r8, r10 -; P8BE-NEXT: lxvw4x v3, 0, r6 -; P8BE-NEXT: slwi r6, r8, 15 -; P8BE-NEXT: mtvsrwz v4, r4 -; P8BE-NEXT: sub r5, r7, r5 -; P8BE-NEXT: sub r3, r3, r6 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: vmrghw v2, v2, v4 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -19946 +; P8BE-NEXT: lis r7, 24749 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: ori r7, r7, 47143 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r8, r4, 48 +; P8BE-NEXT: extsh r6, r5 +; P8BE-NEXT: extsh r8, r8 +; P8BE-NEXT: mulhw r3, r6, r3 +; P8BE-NEXT: mulhw r7, r8, r7 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r3, 31 +; P8BE-NEXT: srwi r3, r3, 4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r7, 31 +; P8BE-NEXT: srawi r7, r7, 11 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: add r6, r7, r6 +; P8BE-NEXT: mulli r6, r6, 5423 +; P8BE-NEXT: sub r3, r5, r3 +; P8BE-NEXT: srawi r5, r4, 15 +; P8BE-NEXT: addze r5, r5 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r6, r8, r6 +; P8BE-NEXT: slwi r5, r5, 15 +; P8BE-NEXT: or r3, r3, r6 +; P8BE-NEXT: sub r4, r4, r5 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 Index: llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -13,37 +13,28 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, 21399 -; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: lis r6, 16727 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 33437 -; P9LE-NEXT: ori r5, r5, 16913 +; P9LE-NEXT: ori r6, r6, 2287 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 16727 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 2287 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: srwi r4, r4, 8 -; P9LE-NEXT: mulli r4, r4, 1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 -; P9LE-NEXT: mulhwu r3, r3, r5 -; P9LE-NEXT: srwi r3, r3, 2 -; P9LE-NEXT: mulli r3, r3, 124 -; P9LE-NEXT: sub r3, r4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r5, r4, 16 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: lis r6, 8456 +; P9LE-NEXT: ori r6, r6, 16913 +; P9LE-NEXT: srwi r5, r5, 8 +; P9LE-NEXT: mulli r5, r5, 1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 ; P9LE-NEXT: lis r4, 22765 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: vextuhrx r3, r3, v2 @@ -55,163 +46,164 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: rlwinm r5, r4, 30, 18, 31 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: srwi r5, r5, 2 +; P9LE-NEXT: mulli r5, r5, 124 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v3, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 16727 -; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: lis r6, 21399 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 2287 -; P9BE-NEXT: ori r5, r5, 16913 +; P9BE-NEXT: ori r6, r6, 33437 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 8 ; P9BE-NEXT: mulli r4, r4, 1003 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 21399 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r5, r5, r6 +; P9BE-NEXT: lis r6, 22765 +; P9BE-NEXT: ori r6, r6, 8969 +; P9BE-NEXT: srwi r5, r5, 5 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: ori r5, r5, 16913 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: srwi r4, r4, 5 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 2 ; P9BE-NEXT: mulli r3, r3, 124 ; P9BE-NEXT: sub r3, r4, r3 -; P9BE-NEXT: lis r4, 22765 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: ori r4, r4, 8969 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: sub r5, r3, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r6, r5, r6 +; P9BE-NEXT: sub r5, r5, r6 ; P9BE-NEXT: srwi r5, r5, 1 -; P9BE-NEXT: add r4, r5, r4 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_1: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 22765 -; P8LE-NEXT: lis r7, 21399 +; P8LE-NEXT: lis r8, 22765 ; P8LE-NEXT: lis r9, 16727 +; P8LE-NEXT: lis r3, 21399 ; P8LE-NEXT: lis r10, 8456 -; P8LE-NEXT: ori r3, r3, 8969 -; P8LE-NEXT: ori r7, r7, 33437 +; P8LE-NEXT: ori r8, r8, 8969 ; P8LE-NEXT: ori r9, r9, 2287 +; P8LE-NEXT: ori r3, r3, 33437 ; P8LE-NEXT: ori r10, r10, 16913 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: clrlwi r7, r7, 16 ; P8LE-NEXT: rldicl r5, r4, 32, 48 -; P8LE-NEXT: clrlwi r6, r6, 16 -; P8LE-NEXT: rldicl r8, r4, 16, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: mulhwu r3, r6, r3 +; P8LE-NEXT: clrlwi r11, r6, 16 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: clrlwi r8, r8, 16 +; P8LE-NEXT: mulhwu r8, r7, r8 +; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: mulhwu r9, r11, r9 ; P8LE-NEXT: rlwinm r11, r4, 30, 18, 31 -; P8LE-NEXT: mulhwu r7, r5, r7 -; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: mulhwu r9, r8, r9 +; P8LE-NEXT: mulhwu r3, r5, r3 ; P8LE-NEXT: mulhwu r10, r11, r10 -; P8LE-NEXT: sub r11, r6, r3 -; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: srwi r7, r7, 5 -; P8LE-NEXT: add r3, r11, r3 +; P8LE-NEXT: sub r11, r7, r8 ; P8LE-NEXT: srwi r9, r9, 8 +; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: srwi r3, r3, 5 +; P8LE-NEXT: add r8, r11, r8 ; P8LE-NEXT: srwi r10, r10, 2 -; P8LE-NEXT: srwi r3, r3, 6 -; P8LE-NEXT: mulli r7, r7, 98 ; P8LE-NEXT: mulli r9, r9, 1003 -; P8LE-NEXT: mulli r3, r3, 95 +; P8LE-NEXT: mulli r3, r3, 98 +; P8LE-NEXT: srwi r8, r8, 6 ; P8LE-NEXT: mulli r10, r10, 124 -; P8LE-NEXT: sub r5, r5, r7 -; P8LE-NEXT: sub r7, r8, r9 -; P8LE-NEXT: sub r3, r6, r3 -; P8LE-NEXT: mtvsrd v2, r5 +; P8LE-NEXT: mulli r8, r8, 95 +; P8LE-NEXT: sub r6, r6, r9 +; P8LE-NEXT: sub r3, r5, r3 ; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v3, r7 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: slwi r5, r6, 16 +; P8LE-NEXT: or r3, r5, r3 +; P8LE-NEXT: sub r5, r7, r8 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: mtvsrwz v2, r3 +; P8LE-NEXT: or r3, r4, r5 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_urem_vec_1: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 -; P8BE-NEXT: lis r7, 16727 ; P8BE-NEXT: lis r9, 21399 -; P8BE-NEXT: lis r10, 8456 +; P8BE-NEXT: lis r8, 16727 ; P8BE-NEXT: ori r3, r3, 8969 -; P8BE-NEXT: ori r7, r7, 2287 ; P8BE-NEXT: ori r9, r9, 33437 -; P8BE-NEXT: ori r10, r10, 16913 -; P8BE-NEXT: rldicl r6, r4, 16, 48 +; P8BE-NEXT: ori r8, r8, 2287 +; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: clrlwi r10, r7, 16 +; P8BE-NEXT: clrlwi r11, r6, 16 +; P8BE-NEXT: mulhwu r3, r10, r3 ; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: mulhwu r3, r6, r3 -; P8BE-NEXT: rldicl r8, r4, 48, 48 -; P8BE-NEXT: mulhwu r7, r5, r7 +; P8BE-NEXT: mulhwu r9, r11, r9 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: clrlwi r8, r8, 16 -; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 -; P8BE-NEXT: mulhwu r9, r8, r9 +; P8BE-NEXT: lis r11, 8456 +; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: rlwinm r12, r4, 30, 18, 31 +; P8BE-NEXT: ori r11, r11, 16913 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r10, r11, r10 -; P8BE-NEXT: sub r11, r6, r3 -; P8BE-NEXT: srwi r7, r7, 8 -; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: add r3, r11, r3 -; P8BE-NEXT: mulli r7, r7, 1003 +; P8BE-NEXT: mulhwu r8, r5, r8 +; P8BE-NEXT: mulhwu r11, r12, r11 +; P8BE-NEXT: sub r10, r10, r3 +; P8BE-NEXT: srwi r10, r10, 1 ; P8BE-NEXT: srwi r9, r9, 5 -; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: srwi r10, r10, 2 +; P8BE-NEXT: add r3, r10, r3 ; P8BE-NEXT: mulli r9, r9, 98 +; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: srwi r8, r8, 8 +; P8BE-NEXT: srwi r10, r11, 2 ; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: mulli r8, r8, 1003 ; P8BE-NEXT: mulli r10, r10, 124 -; P8BE-NEXT: sub r5, r5, r7 -; P8BE-NEXT: addis r7, r2, .LCPI0_0@toc@ha -; P8BE-NEXT: mtvsrwz v2, r5 -; P8BE-NEXT: addi r5, r7, .LCPI0_0@toc@l -; P8BE-NEXT: sub r8, r8, r9 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r3, r6, r3 +; P8BE-NEXT: sub r6, r6, r9 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: sub r5, r5, r8 +; P8BE-NEXT: slwi r3, r3, 16 ; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v4, r8 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v5, v0, v3 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: or r5, r6, r5 +; P8BE-NEXT: or r3, r3, r4 +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -232,21 +224,21 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r5, r3, r4 -; P9LE-NEXT: sub r6, r3, r5 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r6, r5, 16 +; P9LE-NEXT: mulhwu r7, r6, r4 +; P9LE-NEXT: sub r6, r6, r7 ; P9LE-NEXT: srwi r6, r6, 1 -; P9LE-NEXT: add r5, r6, r5 -; P9LE-NEXT: srwi r5, r5, 6 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: slwi r5, r5, 16 +; P9LE-NEXT: or r3, r5, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 ; P9LE-NEXT: sub r6, r3, r5 @@ -255,20 +247,20 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: sub r5, r3, r4 -; P9LE-NEXT: srwi r5, r5, 1 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r6, r5, 16 +; P9LE-NEXT: mulhwu r4, r6, r4 +; P9LE-NEXT: sub r6, r6, r4 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_2: @@ -285,25 +277,22 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r4 -; P9BE-NEXT: sub r6, r3, r5 +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r6, r5, 16 +; P9BE-NEXT: mulhwu r7, r6, r4 +; P9BE-NEXT: sub r6, r6, r7 ; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r5, r6, r5 -; P9BE-NEXT: srwi r5, r5, 6 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: slwi r5, r5, 16 +; P9BE-NEXT: or r3, r5, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: sub r6, r3, r5 ; P9BE-NEXT: srwi r6, r6, 1 @@ -311,20 +300,20 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: sub r5, r3, r4 -; P9BE-NEXT: srwi r5, r5, 1 -; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r6, r5, 16 +; P9BE-NEXT: mulhwu r4, r6, r4 +; P9BE-NEXT: sub r6, r6, r4 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r4, r6, r4 ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_2: @@ -333,49 +322,49 @@ ; P8LE-NEXT: lis r3, 22765 ; P8LE-NEXT: ori r3, r3, 8969 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: clrlwi r6, r6, 16 -; P8LE-NEXT: mulhwu r8, r5, r3 ; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r6, 16 +; P8LE-NEXT: clrlwi r5, r5, 16 +; P8LE-NEXT: clrlwi r9, r4, 16 +; P8LE-NEXT: mulhwu r10, r8, r3 ; P8LE-NEXT: clrlwi r7, r7, 16 -; P8LE-NEXT: mulhwu r9, r6, r3 -; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: mulhwu r10, r7, r3 -; P8LE-NEXT: mulhwu r3, r4, r3 -; P8LE-NEXT: sub r11, r5, r8 -; P8LE-NEXT: sub r12, r6, r9 -; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: add r8, r11, r8 -; P8LE-NEXT: sub r11, r7, r10 -; P8LE-NEXT: srwi r12, r12, 1 -; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: sub r12, r4, r3 -; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: mulhwu r12, r9, r3 +; P8LE-NEXT: mulhwu r11, r5, r3 +; P8LE-NEXT: mulhwu r3, r7, r3 +; P8LE-NEXT: sub r8, r8, r10 +; P8LE-NEXT: sub r9, r9, r12 +; P8LE-NEXT: srwi r8, r8, 1 +; P8LE-NEXT: sub r0, r5, r11 +; P8LE-NEXT: add r8, r8, r10 +; P8LE-NEXT: sub r10, r7, r3 +; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: srwi r0, r0, 1 +; P8LE-NEXT: add r9, r9, r12 +; P8LE-NEXT: srwi r10, r10, 1 +; P8LE-NEXT: add r11, r0, r11 ; P8LE-NEXT: srwi r8, r8, 6 -; P8LE-NEXT: add r10, r11, r10 -; P8LE-NEXT: srwi r11, r12, 1 +; P8LE-NEXT: add r3, r10, r3 ; P8LE-NEXT: srwi r9, r9, 6 -; P8LE-NEXT: add r3, r11, r3 +; P8LE-NEXT: srwi r10, r11, 6 ; P8LE-NEXT: mulli r8, r8, 95 -; P8LE-NEXT: srwi r10, r10, 6 ; P8LE-NEXT: srwi r3, r3, 6 ; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r10, r10, 95 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: sub r5, r5, r8 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v2, r5 -; P8LE-NEXT: sub r5, r7, r10 -; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: sub r5, r5, r10 +; P8LE-NEXT: slwi r6, r6, 16 +; P8LE-NEXT: sub r3, r7, r3 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: or r5, r6, r5 +; P8LE-NEXT: or r3, r4, r3 +; P8LE-NEXT: mtvsrwz v2, r5 +; P8LE-NEXT: mtvsrwz v3, r3 +; P8LE-NEXT: vmrghw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_urem_vec_2: @@ -383,52 +372,49 @@ ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 ; P8BE-NEXT: ori r3, r3, 8969 -; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: mulhwu r8, r5, r3 ; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: clrlwi r8, r6, 16 +; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: clrlwi r9, r4, 16 +; P8BE-NEXT: mulhwu r10, r8, r3 ; P8BE-NEXT: clrlwi r7, r7, 16 -; P8BE-NEXT: mulhwu r9, r6, r3 -; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r10, r7, r3 -; P8BE-NEXT: mulhwu r3, r4, r3 -; P8BE-NEXT: sub r11, r5, r8 -; P8BE-NEXT: sub r12, r6, r9 -; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: add r8, r11, r8 -; P8BE-NEXT: sub r11, r7, r10 -; P8BE-NEXT: srwi r12, r12, 1 -; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: sub r12, r4, r3 -; P8BE-NEXT: srwi r11, r11, 1 +; P8BE-NEXT: mulhwu r12, r9, r3 +; P8BE-NEXT: mulhwu r11, r5, r3 +; P8BE-NEXT: mulhwu r3, r7, r3 +; P8BE-NEXT: sub r8, r8, r10 +; P8BE-NEXT: sub r9, r9, r12 +; P8BE-NEXT: srwi r8, r8, 1 +; P8BE-NEXT: sub r0, r5, r11 +; P8BE-NEXT: add r8, r8, r10 +; P8BE-NEXT: sub r10, r7, r3 +; P8BE-NEXT: srwi r9, r9, 1 +; P8BE-NEXT: srwi r0, r0, 1 +; P8BE-NEXT: add r9, r9, r12 +; P8BE-NEXT: srwi r10, r10, 1 +; P8BE-NEXT: add r11, r0, r11 ; P8BE-NEXT: srwi r8, r8, 6 -; P8BE-NEXT: add r10, r11, r10 -; P8BE-NEXT: srwi r11, r12, 1 +; P8BE-NEXT: add r3, r10, r3 ; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: srwi r10, r11, 6 ; P8BE-NEXT: mulli r8, r8, 95 -; P8BE-NEXT: add r3, r11, r3 -; P8BE-NEXT: srwi r10, r10, 6 ; P8BE-NEXT: srwi r3, r3, 6 ; P8BE-NEXT: mulli r9, r9, 95 ; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sub r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: sub r4, r4, r9 +; P8BE-NEXT: sub r5, r5, r10 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r5, r6, r5 +; P8BE-NEXT: or r3, r4, r3 ; P8BE-NEXT: mtvsrwz v2, r5 -; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l -; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r7, r10 -; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -439,68 +425,68 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-LABEL: combine_urem_udiv: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, 22765 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 8969 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r5, r3, r4 -; P9LE-NEXT: sub r6, r3, r5 -; P9LE-NEXT: srwi r6, r6, 1 -; P9LE-NEXT: add r5, r6, r5 -; P9LE-NEXT: srwi r5, r5, 6 -; P9LE-NEXT: mulli r6, r5, 95 -; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, 22765 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r6, r3, 16 -; P9LE-NEXT: mulhwu r7, r6, r4 -; P9LE-NEXT: sub r6, r6, r7 -; P9LE-NEXT: srwi r6, r6, 1 -; P9LE-NEXT: add r6, r6, r7 -; P9LE-NEXT: srwi r6, r6, 6 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r7, r3, 16 -; P9LE-NEXT: mulhwu r8, r7, r4 -; P9LE-NEXT: sub r7, r7, r8 -; P9LE-NEXT: srwi r7, r7, 1 -; P9LE-NEXT: add r7, r7, r8 +; P9LE-NEXT: ori r5, r5, 8969 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r6, r4, r5 +; P9LE-NEXT: sub r4, r4, r6 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: srwi r6, r4, 6 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r3, r3, r6 +; P9LE-NEXT: li r6, 0 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: clrlwi r6, r6, 16 +; P9LE-NEXT: mulhwu r7, r6, r5 +; P9LE-NEXT: sub r8, r6, r7 +; P9LE-NEXT: srwi r8, r8, 1 +; P9LE-NEXT: add r7, r8, r7 ; P9LE-NEXT: srwi r7, r7, 6 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: or r3, r3, r6 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r8, r3, 16 -; P9LE-NEXT: mulhwu r4, r8, r4 -; P9LE-NEXT: sub r8, r8, r4 -; P9LE-NEXT: srwi r8, r8, 1 -; P9LE-NEXT: add r4, r8, r4 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: clrlwi r6, r3, 16 +; P9LE-NEXT: mulhwu r8, r6, r5 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r6, r6, r8 +; P9LE-NEXT: srwi r8, r6, 6 +; P9LE-NEXT: mulli r8, r8, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 4 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: clrlwi r9, r8, 16 +; P9LE-NEXT: mulhwu r5, r9, r5 +; P9LE-NEXT: sub r9, r9, r5 +; P9LE-NEXT: srwi r9, r9, 1 +; P9LE-NEXT: add r5, r9, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r9, r5, 95 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: or r3, r3, r8 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: rlwinm r3, r4, 10, 0, 15 +; P9LE-NEXT: or r3, r3, r7 +; P9LE-NEXT: vmrghw v2, v2, v3 +; P9LE-NEXT: mtvsrwz v3, r3 +; P9LE-NEXT: rlwinm r3, r6, 10, 0, 15 +; P9LE-NEXT: or r3, r3, r5 +; P9LE-NEXT: mtvsrwz v4, r3 +; P9LE-NEXT: vmrghw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: combine_urem_udiv: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: lis r5, 22765 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 8969 @@ -509,56 +495,53 @@ ; P9BE-NEXT: sub r4, r4, r6 ; P9BE-NEXT: srwi r4, r4, 1 ; P9BE-NEXT: add r4, r4, r6 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r6, r4, 95 +; P9BE-NEXT: srwi r6, r4, 6 +; P9BE-NEXT: mulli r6, r6, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r6, r3, 16 -; P9BE-NEXT: mulhwu r7, r6, r5 -; P9BE-NEXT: sub r6, r6, r7 -; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r6, r6, r7 -; P9BE-NEXT: srwi r6, r6, 6 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r7, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: li r6, 6 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: clrlwi r7, r6, 16 ; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: sub r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 1 ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 6 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: or r3, r3, r6 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r5 -; P9BE-NEXT: sub r8, r3, r5 -; P9BE-NEXT: srwi r8, r8, 1 -; P9BE-NEXT: add r5, r8, r5 -; P9BE-NEXT: srwi r5, r5, 6 -; P9BE-NEXT: mulli r8, r5, 95 -; P9BE-NEXT: mtvsrwz v0, r5 +; P9BE-NEXT: clrlwi r6, r3, 16 +; P9BE-NEXT: mulhwu r8, r6, r5 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r6, r6, r8 +; P9BE-NEXT: srwi r8, r6, 6 +; P9BE-NEXT: mulli r8, r8, 95 ; P9BE-NEXT: sub r3, r3, r8 +; P9BE-NEXT: li r8, 2 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: clrlwi r9, r8, 16 +; P9BE-NEXT: mulhwu r5, r9, r5 +; P9BE-NEXT: sub r9, r9, r5 +; P9BE-NEXT: srwi r9, r9, 1 +; P9BE-NEXT: add r5, r9, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: or r3, r3, r8 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrwz v3, r4 -; P9BE-NEXT: vperm v3, v4, v3, v5 -; P9BE-NEXT: mtvsrwz v4, r7 -; P9BE-NEXT: vperm v4, v0, v4, v5 -; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: rlwinm r3, r4, 10, 0, 15 +; P9BE-NEXT: or r3, r3, r7 +; P9BE-NEXT: vmrgow v2, v2, v3 +; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: rlwinm r3, r6, 10, 0, 15 +; P9BE-NEXT: or r3, r3, r5 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vmrgow v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -569,57 +552,57 @@ ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 8969 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r8, r6, 16 -; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: mulhwu r9, r5, r3 -; P8LE-NEXT: mulhwu r11, r8, r3 -; P8LE-NEXT: clrlwi r10, r7, 16 -; P8LE-NEXT: clrlwi r12, r4, 16 -; P8LE-NEXT: mulhwu r0, r10, r3 -; P8LE-NEXT: mulhwu r3, r12, r3 -; P8LE-NEXT: sub r30, r5, r9 -; P8LE-NEXT: sub r8, r8, r11 -; P8LE-NEXT: srwi r30, r30, 1 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r5, 16 +; P8LE-NEXT: clrlwi r6, r6, 16 +; P8LE-NEXT: rldicl r4, r4, 32, 48 +; P8LE-NEXT: clrlwi r9, r7, 16 +; P8LE-NEXT: mulhwu r10, r8, r3 +; P8LE-NEXT: clrlwi r11, r4, 16 +; P8LE-NEXT: mulhwu r12, r6, r3 +; P8LE-NEXT: mulhwu r0, r9, r3 +; P8LE-NEXT: mulhwu r3, r11, r3 +; P8LE-NEXT: sub r8, r8, r10 +; P8LE-NEXT: sub r30, r6, r12 +; P8LE-NEXT: sub r9, r9, r0 ; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: sub r10, r10, r0 -; P8LE-NEXT: add r9, r30, r9 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: sub r11, r12, r3 -; P8LE-NEXT: srwi r10, r10, 1 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: srwi r9, r9, 6 -; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: srwi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r0 -; P8LE-NEXT: mulli r12, r9, 95 -; P8LE-NEXT: add r3, r11, r3 -; P8LE-NEXT: mtvsrd v2, r9 -; P8LE-NEXT: srwi r10, r10, 6 -; P8LE-NEXT: mulli r9, r8, 95 +; P8LE-NEXT: sub r11, r11, r3 +; P8LE-NEXT: srwi r30, r30, 1 +; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: add r8, r8, r10 +; P8LE-NEXT: srwi r10, r11, 1 +; P8LE-NEXT: add r11, r30, r12 +; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: srwi r12, r8, 6 +; P8LE-NEXT: rlwinm r8, r8, 10, 0, 15 +; P8LE-NEXT: add r3, r10, r3 +; P8LE-NEXT: srwi r10, r11, 6 +; P8LE-NEXT: srwi r11, r9, 6 +; P8LE-NEXT: mulli r12, r12, 95 ; P8LE-NEXT: srwi r3, r3, 6 -; P8LE-NEXT: mtvsrd v3, r8 -; P8LE-NEXT: mulli r8, r10, 95 -; P8LE-NEXT: mtvsrd v4, r10 -; P8LE-NEXT: mulli r10, r3, 95 -; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: or r8, r8, r10 +; P8LE-NEXT: mulli r0, r10, 95 +; P8LE-NEXT: mulli r11, r11, 95 +; P8LE-NEXT: mtvsrwz v2, r8 +; P8LE-NEXT: mulli r30, r3, 95 ; P8LE-NEXT: sub r5, r5, r12 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: mtvsrd v5, r6 -; P8LE-NEXT: sub r5, r7, r8 -; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v0, r5 -; P8LE-NEXT: mtvsrd v1, r4 -; P8LE-NEXT: vmrghh v3, v5, v3 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v0, v1, v0 -; P8LE-NEXT: vmrghh v4, v5, v4 -; P8LE-NEXT: vmrglw v3, v0, v3 -; P8LE-NEXT: vmrglw v2, v4, v2 +; P8LE-NEXT: sub r6, r6, r0 +; P8LE-NEXT: sub r7, r7, r11 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r4, r30 +; P8LE-NEXT: slwi r7, r7, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: rlwinm r6, r9, 10, 0, 15 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: or r4, r7, r4 +; P8LE-NEXT: or r3, r6, r3 +; P8LE-NEXT: mtvsrwz v3, r5 +; P8LE-NEXT: mtvsrwz v4, r4 +; P8LE-NEXT: mtvsrwz v5, r3 +; P8LE-NEXT: vmrghw v3, v4, v3 +; P8LE-NEXT: vmrghw v2, v5, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -627,61 +610,60 @@ ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8BE-NEXT: ori r3, r3, 8969 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r6, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: clrlwi r8, r5, 16 ; P8BE-NEXT: clrlwi r9, r6, 16 -; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: rldicl r4, r4, 16, 48 -; P8BE-NEXT: mulhwu r10, r8, r3 -; P8BE-NEXT: mulhwu r12, r9, r3 -; P8BE-NEXT: clrlwi r11, r7, 16 -; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r0, r11, r3 -; P8BE-NEXT: mulhwu r3, r4, r3 -; P8BE-NEXT: sub r8, r8, r10 -; P8BE-NEXT: sub r9, r9, r12 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: clrlwi r10, r7, 16 +; P8BE-NEXT: mulhwu r11, r8, r3 +; P8BE-NEXT: clrlwi r12, r4, 16 +; P8BE-NEXT: mulhwu r0, r9, r3 +; P8BE-NEXT: mulhwu r30, r10, r3 +; P8BE-NEXT: mulhwu r3, r12, r3 +; P8BE-NEXT: sub r8, r8, r11 +; P8BE-NEXT: sub r9, r9, r0 +; P8BE-NEXT: sub r10, r10, r30 ; P8BE-NEXT: srwi r8, r8, 1 +; P8BE-NEXT: sub r12, r12, r3 ; P8BE-NEXT: srwi r9, r9, 1 -; P8BE-NEXT: sub r11, r11, r0 -; P8BE-NEXT: add r8, r8, r10 -; P8BE-NEXT: add r9, r9, r12 -; P8BE-NEXT: sub r12, r4, r3 -; P8BE-NEXT: addis r10, r2, .LCPI2_0@toc@ha -; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: srwi r8, r8, 6 -; P8BE-NEXT: srwi r12, r12, 1 +; P8BE-NEXT: srwi r10, r10, 1 +; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: srwi r11, r12, 1 +; P8BE-NEXT: add r9, r9, r0 +; P8BE-NEXT: add r10, r10, r30 +; P8BE-NEXT: srwi r12, r8, 6 +; P8BE-NEXT: rlwinm r8, r8, 10, 0, 15 +; P8BE-NEXT: add r3, r11, r3 ; P8BE-NEXT: srwi r9, r9, 6 -; P8BE-NEXT: addi r10, r10, .LCPI2_0@toc@l -; P8BE-NEXT: add r11, r11, r0 -; P8BE-NEXT: mulli r0, r8, 95 -; P8BE-NEXT: add r3, r12, r3 -; P8BE-NEXT: mtvsrwz v3, r8 -; P8BE-NEXT: lxvw4x v2, 0, r10 -; P8BE-NEXT: srwi r10, r11, 6 -; P8BE-NEXT: mulli r8, r9, 95 +; P8BE-NEXT: srwi r11, r10, 6 +; P8BE-NEXT: mulli r12, r12, 95 ; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: mtvsrwz v4, r9 -; P8BE-NEXT: mulli r9, r10, 95 -; P8BE-NEXT: mtvsrwz v5, r10 -; P8BE-NEXT: mulli r10, r3, 95 -; P8BE-NEXT: vperm v3, v4, v3, v2 -; P8BE-NEXT: sub r5, r5, r0 -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: mtvsrwz v4, r5 -; P8BE-NEXT: mtvsrwz v0, r6 -; P8BE-NEXT: sub r5, r7, r9 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v1, r5 -; P8BE-NEXT: mtvsrwz v6, r4 -; P8BE-NEXT: vperm v4, v0, v4, v2 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v1, v6, v1, v2 -; P8BE-NEXT: vperm v2, v0, v5, v2 -; P8BE-NEXT: vmrghw v4, v1, v4 -; P8BE-NEXT: vmrghw v2, v2, v3 -; P8BE-NEXT: vadduhm v2, v4, v2 +; P8BE-NEXT: or r8, r8, r9 +; P8BE-NEXT: mulli r0, r9, 95 +; P8BE-NEXT: mulli r11, r11, 95 +; P8BE-NEXT: mtvsrwz v2, r8 +; P8BE-NEXT: mulli r30, r3, 95 +; P8BE-NEXT: sub r5, r5, r12 +; P8BE-NEXT: sub r6, r6, r0 +; P8BE-NEXT: sub r7, r7, r11 +; P8BE-NEXT: slwi r5, r5, 16 +; P8BE-NEXT: sub r4, r4, r30 +; P8BE-NEXT: slwi r7, r7, 16 +; P8BE-NEXT: or r5, r5, r6 +; P8BE-NEXT: rlwinm r6, r10, 10, 0, 15 +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: or r4, r7, r4 +; P8BE-NEXT: or r3, r6, r3 +; P8BE-NEXT: mtvsrwz v3, r5 +; P8BE-NEXT: mtvsrwz v4, r4 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: vmrgow v3, v4, v3 +; P8BE-NEXT: vmrgow v2, v5, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -693,55 +675,47 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_urem_power_of_two: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, 22765 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 8969 -; P9LE-NEXT: clrlwi r3, r3, 26 -; P9LE-NEXT: mtvsrd v3, r3 +; P9LE-NEXT: li r4, 0 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: vextuhrx r4, r4, v2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 27 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: ori r5, r5, 8969 +; P9LE-NEXT: clrlwi r4, r4, 26 +; P9LE-NEXT: rlwimi r4, r3, 16, 11, 15 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: sub r5, r3, r4 -; P9LE-NEXT: srwi r5, r5, 1 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: mtvsrwz v3, r4 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r5, r4, r5 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 29 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 29 +; P9LE-NEXT: rlwimi r4, r3, 16, 0, 15 +; P9LE-NEXT: mtvsrwz v2, r4 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_power_of_two: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: lis r4, 22765 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 8969 -; P9BE-NEXT: clrlwi r3, r3, 27 -; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: li r4, 2 ; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 26 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 +; P9BE-NEXT: clrlwi r4, r4, 27 +; P9BE-NEXT: rlwimi r4, r3, 16, 10, 15 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: mtvsrwz v3, r4 +; P9BE-NEXT: lis r4, 22765 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 ; P9BE-NEXT: srwi r5, r5, 1 @@ -749,13 +723,12 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 29 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: rlwinm r4, r4, 16, 13, 15 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_power_of_two: @@ -766,58 +739,49 @@ ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 ; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: mulhwu r3, r5, r3 -; P8LE-NEXT: sub r6, r5, r3 +; P8LE-NEXT: clrlwi r6, r5, 16 +; P8LE-NEXT: mulhwu r3, r6, r3 +; P8LE-NEXT: sub r6, r6, r3 ; P8LE-NEXT: srwi r6, r6, 1 ; P8LE-NEXT: add r3, r6, r3 -; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 32, 48 ; P8LE-NEXT: srwi r3, r3, 6 -; P8LE-NEXT: clrlwi r6, r6, 26 +; P8LE-NEXT: clrldi r4, r4, 48 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: mtvsrd v2, r6 -; P8LE-NEXT: clrlwi r6, r7, 27 -; P8LE-NEXT: clrlwi r4, r4, 29 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: clrlwi r4, r4, 26 +; P8LE-NEXT: rlwimi r4, r7, 16, 11, 15 +; P8LE-NEXT: mtvsrwz v3, r4 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: vmrghh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: clrlwi r5, r6, 29 +; P8LE-NEXT: rlwimi r5, r3, 16, 0, 15 +; P8LE-NEXT: mtvsrwz v2, r5 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_power_of_two: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 -; P8BE-NEXT: addis r7, r2, .LCPI3_0@toc@ha ; P8BE-NEXT: ori r3, r3, 8969 ; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r8, r4, 16, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: mulhwu r3, r5, r3 ; P8BE-NEXT: sub r6, r5, r3 ; P8BE-NEXT: srwi r6, r6, 1 ; P8BE-NEXT: add r3, r6, r3 -; P8BE-NEXT: rldicl r6, r4, 32, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 ; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: clrlwi r6, r6, 27 +; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: mtvsrwz v2, r6 -; P8BE-NEXT: addi r6, r7, .LCPI3_0@toc@l -; P8BE-NEXT: rldicl r4, r4, 48, 48 -; P8BE-NEXT: clrlwi r7, r8, 26 -; P8BE-NEXT: lxvw4x v3, 0, r6 -; P8BE-NEXT: clrlwi r4, r4, 29 -; P8BE-NEXT: mtvsrwz v4, r7 -; P8BE-NEXT: mtvsrwz v0, r4 +; P8BE-NEXT: clrlwi r4, r4, 27 +; P8BE-NEXT: rlwimi r4, r7, 16, 10, 15 +; P8BE-NEXT: mtvsrwz v3, r4 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: rlwinm r5, r6, 16, 13, 15 +; P8BE-NEXT: or r3, r5, r3 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -829,163 +793,145 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, -19946 -; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: lis r6, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 -; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: ori r6, r6, 47143 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 4 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: srwi r4, r4, 11 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r5, r4, 16 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtvsrwz v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 -; P9LE-NEXT: mulhwu r3, r3, r5 -; P9LE-NEXT: srwi r3, r3, 8 -; P9LE-NEXT: mulli r3, r3, 654 -; P9LE-NEXT: sub r3, r4, r3 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 8 +; P9LE-NEXT: mulli r4, r4, 654 +; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: mtvsrwz v2, r3 +; P9LE-NEXT: vmrghw v2, v3, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_one: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: lis r6, -19946 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: ori r5, r5, 30865 +; P9BE-NEXT: ori r6, r6, 17097 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 11 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r5, r5, r6 +; P9BE-NEXT: srwi r5, r5, 4 +; P9BE-NEXT: mulli r5, r5, 23 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: ori r5, r5, 30865 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: mtvsrwz v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: srwi r4, r4, 4 -; P9BE-NEXT: mulli r4, r4, 23 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 8 ; P9BE-NEXT: mulli r3, r3, 654 ; P9BE-NEXT: sub r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, -14230 +; P8LE-NEXT: lis r3, 24749 ; P8LE-NEXT: lis r7, -19946 -; P8LE-NEXT: lis r9, 24749 -; P8LE-NEXT: ori r3, r3, 30865 +; P8LE-NEXT: lis r9, -14230 +; P8LE-NEXT: ori r3, r3, 47143 ; P8LE-NEXT: ori r7, r7, 17097 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: rldicl r5, r4, 48, 48 -; P8LE-NEXT: rldicl r6, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: rlwinm r8, r5, 31, 17, 31 -; P8LE-NEXT: clrlwi r6, r6, 16 +; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: rldicl r5, r4, 32, 48 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: clrlwi r8, r6, 16 ; P8LE-NEXT: clrlwi r5, r5, 16 ; P8LE-NEXT: mulhwu r3, r8, r3 -; P8LE-NEXT: ori r8, r9, 47143 -; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: li r9, 0 -; P8LE-NEXT: mulhwu r7, r6, r7 -; P8LE-NEXT: mulhwu r8, r4, r8 -; P8LE-NEXT: mtvsrd v2, r9 -; P8LE-NEXT: srwi r3, r3, 8 +; P8LE-NEXT: ori r8, r9, 30865 +; P8LE-NEXT: rlwinm r9, r4, 31, 17, 31 +; P8LE-NEXT: mulhwu r7, r5, r7 +; P8LE-NEXT: mulhwu r8, r9, r8 +; P8LE-NEXT: srwi r3, r3, 11 ; P8LE-NEXT: srwi r7, r7, 4 -; P8LE-NEXT: mulli r3, r3, 654 -; P8LE-NEXT: srwi r8, r8, 11 +; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: srwi r8, r8, 8 ; P8LE-NEXT: mulli r7, r7, 23 -; P8LE-NEXT: mulli r8, r8, 5423 -; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: sub r5, r6, r7 -; P8LE-NEXT: mtvsrd v3, r3 -; P8LE-NEXT: sub r3, r4, r8 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: mulli r8, r8, 654 +; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: sub r5, r5, r7 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: sub r4, r4, r8 +; P8LE-NEXT: or r3, r3, r5 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: mtvsrwz v2, r3 +; P8LE-NEXT: mtvsrwz v3, r4 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_one: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 24749 -; P8BE-NEXT: lis r7, -19946 +; P8BE-NEXT: lis r3, -19946 +; P8BE-NEXT: lis r6, 24749 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: ori r6, r6, 47143 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r7, r4, 48 +; P8BE-NEXT: clrlwi r8, r5, 16 +; P8BE-NEXT: clrlwi r7, r7, 16 +; P8BE-NEXT: mulhwu r3, r8, r3 ; P8BE-NEXT: lis r8, -14230 -; P8BE-NEXT: li r10, 0 -; P8BE-NEXT: ori r3, r3, 47143 -; P8BE-NEXT: ori r7, r7, 17097 -; P8BE-NEXT: ori r8, r8, 30865 -; P8BE-NEXT: mtvsrwz v2, r10 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: mulhwu r3, r5, r3 +; P8BE-NEXT: mulhwu r6, r7, r6 +; P8BE-NEXT: ori r8, r8, 30865 ; P8BE-NEXT: rlwinm r9, r4, 31, 17, 31 -; P8BE-NEXT: mulhwu r7, r6, r7 +; P8BE-NEXT: clrlwi r4, r4, 16 ; P8BE-NEXT: mulhwu r8, r9, r8 -; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha -; P8BE-NEXT: srwi r3, r3, 11 -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: srwi r7, r7, 4 +; P8BE-NEXT: srwi r3, r3, 4 +; P8BE-NEXT: srwi r6, r6, 11 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: mulli r6, r6, 5423 ; P8BE-NEXT: srwi r8, r8, 8 -; P8BE-NEXT: mulli r7, r7, 23 ; P8BE-NEXT: mulli r8, r8, 654 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l -; P8BE-NEXT: mtvsrwz v4, r3 -; P8BE-NEXT: clrlwi r3, r4, 16 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r6, r7 -; P8BE-NEXT: sub r3, r3, r8 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: vmrghw v2, v2, v4 +; P8BE-NEXT: sub r5, r7, r6 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r4, r4, r8 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 Index: llvm/test/CodeGen/PowerPC/vec-trunc2.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec-trunc2.ll +++ llvm/test/CodeGen/PowerPC/vec-trunc2.ll @@ -115,33 +115,35 @@ define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) { ; CHECK-LABEL: test8x24: ; CHECK: # %bb.0: -; CHECK-NEXT: mtvsrd v2, r3 -; CHECK-NEXT: mtvsrd v3, r4 -; CHECK-NEXT: mtvsrd v4, r5 -; CHECK-NEXT: mtvsrd v5, r6 -; CHECK-NEXT: mtvsrd v0, r7 -; CHECK-NEXT: mtvsrd v1, r8 -; CHECK-NEXT: vmrghh v2, v3, v2 -; CHECK-NEXT: mtvsrd v3, r9 -; CHECK-NEXT: vmrghh v4, v5, v4 -; CHECK-NEXT: mtvsrd v5, r10 -; CHECK-NEXT: vmrghh v0, v1, v0 -; CHECK-NEXT: vmrghh v3, v5, v3 -; CHECK-NEXT: vmrglw v2, v4, v2 -; CHECK-NEXT: vmrglw v3, v3, v0 -; CHECK-NEXT: xxmrgld v2, v3, v2 +; CHECK-NEXT: slwi r6, r6, 16 +; CHECK-NEXT: slwi r4, r4, 16 +; CHECK-NEXT: slwi r10, r10, 16 +; CHECK-NEXT: slwi r8, r8, 16 +; CHECK-NEXT: or r5, r6, r5 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: or r4, r10, r9 +; CHECK-NEXT: or r6, r8, r7 +; CHECK-NEXT: rldimi r3, r5, 32, 0 +; CHECK-NEXT: rldimi r6, r4, 32, 0 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mtfprd f1, r6 +; CHECK-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test8x24: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: sth r10, -16(r1) -; CHECK-BE-NEXT: sth r9, -32(r1) -; CHECK-BE-NEXT: sth r8, -48(r1) -; CHECK-BE-NEXT: sth r7, -64(r1) -; CHECK-BE-NEXT: sth r6, -80(r1) -; CHECK-BE-NEXT: sth r5, -96(r1) -; CHECK-BE-NEXT: sth r4, -112(r1) -; CHECK-BE-NEXT: sth r3, -128(r1) +; CHECK-BE-NEXT: slwi r9, r9, 16 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: slwi r3, r3, 16 +; CHECK-BE-NEXT: or r9, r9, r10 +; CHECK-BE-NEXT: or r7, r7, r8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: stw r9, -16(r1) +; CHECK-BE-NEXT: stw r7, -32(r1) +; CHECK-BE-NEXT: stw r5, -48(r1) +; CHECK-BE-NEXT: stw r3, -64(r1) ; CHECK-BE-NEXT: addi r3, r1, -16 ; CHECK-BE-NEXT: lxvw4x v2, 0, r3 ; CHECK-BE-NEXT: addi r3, r1, -32 @@ -150,19 +152,7 @@ ; CHECK-BE-NEXT: lxvw4x v4, 0, r3 ; CHECK-BE-NEXT: addi r3, r1, -64 ; CHECK-BE-NEXT: lxvw4x v5, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -80 -; CHECK-BE-NEXT: lxvw4x v0, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -96 -; CHECK-BE-NEXT: lxvw4x v1, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -112 -; CHECK-BE-NEXT: lxvw4x v6, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -128 -; CHECK-BE-NEXT: lxvw4x v7, 0, r3 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: vmrghh v3, v5, v4 -; CHECK-BE-NEXT: vmrghh v4, v1, v0 ; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: vmrghh v5, v7, v6 ; CHECK-BE-NEXT: vmrghw v3, v5, v4 ; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -20,12 +20,8 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt: @@ -33,37 +29,26 @@ ; CHECK-P9-NEXT: mtfprd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <2 x float> @@ -75,86 +60,77 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghh v3, v4, v3 -; CHECK-P8-NEXT: vmrghh v2, v2, v5 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P8-NEXT: mtvsrwz v2, r5 +; CHECK-P8-NEXT: mtvsrwz v3, r6 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xxswapd vs0, v2 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 +; CHECK-P9-NEXT: mtvsrwz v3, r4 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mtvsrwz v2, r4 +; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-NEXT: xxswapd vs0, v2 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -166,159 +142,134 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f1, v3 ; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs3, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: rlwimi r5, r3, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r3, f5 +; CHECK-P8-NEXT: mffprwz r7, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v3, v3, v4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghh v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrghh v4, v4, v1 -; CHECK-P8-NEXT: vmrglw v3, v4, v5 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r7, r5, 32, 0 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: mffprwz r8, f0 +; CHECK-P8-NEXT: rlwimi r3, r9, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f0, r7 +; CHECK-P8-NEXT: rlwimi r8, r4, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r3, r8, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -329,307 +280,266 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r6, 48 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: lvx v4, r4, r5 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v3, r4, r5 -; CHECK-P8-NEXT: lvx v2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v5 -; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f3, v5 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f0, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f5 -; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r8, f3 +; CHECK-P8-NEXT: xxswapd vs3, v3 +; CHECK-P8-NEXT: mffprwz r6, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xscvdpsxws f5, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f8 -; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xxswapd vs0, v4 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd vs5, v2 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: vmrghh v3, v0, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f1, vs5 -; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v5, v1 -; CHECK-P8-NEXT: vmrghh v5, v6, v0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f5 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 3 +; CHECK-P8-NEXT: rlwimi r9, r4, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r11, f4 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 3 +; CHECK-P8-NEXT: mffprwz r10, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: mffprwz r12, f2 +; CHECK-P8-NEXT: xxswapd vs2, v2 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: rlwimi r11, r10, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r0, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 1 +; CHECK-P8-NEXT: rlwimi r12, r6, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r11, r9, 32, 0 +; CHECK-P8-NEXT: mffprwz r30, f3 +; CHECK-P8-NEXT: xxswapd vs3, v5 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: rlwimi r30, r0, 16, 0, 15 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: rldimi r30, r12, 32, 0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v0, v0, v7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v7, v8, v7 -; CHECK-P8-NEXT: vmrghh v6, v6, v9 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglw v3, v0, v5 -; CHECK-P8-NEXT: vmrglw v4, v1, v4 -; CHECK-P8-NEXT: vmrglw v5, v6, v7 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: mffprwz r29, f0 +; CHECK-P8-NEXT: mffprwz r28, f2 +; CHECK-P8-NEXT: mffprwz r6, f1 +; CHECK-P8-NEXT: rlwimi r29, r7, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: rlwimi r4, r28, 16, 0, 15 +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r9, f5 +; CHECK-P8-NEXT: rlwimi r6, r8, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r4, r29, 32, 0 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mtfprd f0, r11 +; CHECK-P8-NEXT: rlwimi r9, r7, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f1, r30 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: rldimi r9, r6, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r4 +; CHECK-P8-NEXT: mtfprd f3, r9 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd v3, vs2, vs3 +; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-P9-NEXT: xscvspdpn f3, vs2 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 16(r4) +; CHECK-P9-NEXT: mffprwz r4, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-P9-NEXT: rlwimi r4, r5, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs3, vs3, 1 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: rlwimi r5, r6, 16, 0, 15 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvdpsxws f4, f5 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs1 +; CHECK-P9-NEXT: mffprwz r7, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r6, f2 +; CHECK-P9-NEXT: mffprwz r8, f3 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: rlwimi r8, r6, 16, 0, 15 +; CHECK-P9-NEXT: mffprwz r6, f4 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r8, r7, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mtvsrdd vs2, r8, r5 +; CHECK-P9-NEXT: stxv vs2, 0(r3) +; CHECK-P9-NEXT: mffprwz r7, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs3 -; CHECK-P9-NEXT: vmrghh v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r7, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mtvsrd v1, r5 -; CHECK-P9-NEXT: vmrghh v0, v1, v0 -; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r6, r7 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r5 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 48(r4) +; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-BE-NEXT: xscvspdpn f3, vs2 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: lxv vs3, 0(r4) +; CHECK-BE-NEXT: mffprwz r4, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f5, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-BE-NEXT: rlwimi r4, r5, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs3, vs3, 1 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: rlwimi r5, r6, 16, 0, 15 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f5 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs1 +; CHECK-BE-NEXT: mffprwz r7, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs3 +; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: mffprwz r8, f3 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: rlwimi r8, r6, 16, 0, 15 +; CHECK-BE-NEXT: mffprwz r6, f4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r8, r7, 32, 0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtvsrdd vs2, r8, r5 +; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: mffprwz r7, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs3 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-BE-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r7, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v4, v0, v5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrwz v6, r5 -; CHECK-BE-NEXT: vperm v1, v6, v1, v2 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) -; CHECK-BE-NEXT: mtvsrwz v3, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: vperm v3, v5, v3, v2 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghw v3, v3, v1 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xscvspdpn f0, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r6, r7 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -649,13 +559,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: @@ -667,33 +574,24 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <2 x float> @@ -705,86 +603,83 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghh v3, v4, v3 -; CHECK-P8-NEXT: vmrghh v2, v2, v5 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 16 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: or r3, r4, r6 +; CHECK-P8-NEXT: mtvsrwz v3, r3 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mtvsrwz v3, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -796,159 +691,146 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f1, v3 ; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, v2 +; CHECK-P8-NEXT: xxswapd vs4, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: slwi r4, r4, 16 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r7, f4 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: mffprwz r10, f5 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: or r3, r3, r5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v3, v3, v4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghh v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrghh v4, v4, v1 -; CHECK-P8-NEXT: vmrglw v3, v4, v5 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: or r5, r7, r6 +; CHECK-P8-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: mffprwz r8, f0 +; CHECK-P8-NEXT: slwi r6, r9, 16 +; CHECK-P8-NEXT: mtfprd f0, r5 +; CHECK-P8-NEXT: or r4, r4, r8 +; CHECK-P8-NEXT: or r6, r6, r10 +; CHECK-P8-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r4, r5, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r3, r5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r4, r5, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r3, r5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -959,307 +841,292 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r6, 48 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: lvx v4, r4, r5 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v3, r4, r5 -; CHECK-P8-NEXT: lvx v2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v5 -; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f3, v5 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: xxswapd vs5, v3 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: xscvspdpn f0, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f5 -; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r8, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 3 +; CHECK-P8-NEXT: mffprwz r6, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs4 +; CHECK-P8-NEXT: xxswapd vs4, v4 +; CHECK-P8-NEXT: slwi r8, r8, 16 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xscvdpsxws f5, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f8 -; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f5 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 3 +; CHECK-P8-NEXT: or r4, r4, r9 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd vs5, v2 +; CHECK-P8-NEXT: mffprwz r11, f4 +; CHECK-P8-NEXT: xxswapd vs4, v2 +; CHECK-P8-NEXT: mffprwz r10, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: slwi r11, r11, 16 +; CHECK-P8-NEXT: mffprwz r0, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 1 +; CHECK-P8-NEXT: or r9, r11, r10 +; CHECK-P8-NEXT: mffprwz r30, f3 +; CHECK-P8-NEXT: xxswapd vs3, v5 +; CHECK-P8-NEXT: rldimi r9, r4, 32, 0 +; CHECK-P8-NEXT: mffprwz r12, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: slwi r30, r30, 16 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: or r6, r6, r12 +; CHECK-P8-NEXT: or r10, r30, r0 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: rldimi r10, r6, 32, 0 +; CHECK-P8-NEXT: slwi r6, r7, 16 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: vmrghh v3, v0, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f1, vs5 -; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v5, v1 -; CHECK-P8-NEXT: vmrghh v5, v6, v0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mffprwz r7, f5 +; CHECK-P8-NEXT: mffprwz r27, f4 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v0, v0, v7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v7, v8, v7 -; CHECK-P8-NEXT: vmrghh v6, v6, v9 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglw v3, v0, v5 -; CHECK-P8-NEXT: vmrglw v4, v1, v4 -; CHECK-P8-NEXT: vmrglw v5, v6, v7 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: mffprwz r29, f0 +; CHECK-P8-NEXT: slwi r12, r27, 16 +; CHECK-P8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r28, f2 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: mffprwz r11, f1 +; CHECK-P8-NEXT: or r6, r6, r29 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: or r12, r12, r28 +; CHECK-P8-NEXT: mtfprd f0, r9 +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: or r8, r8, r11 +; CHECK-P8-NEXT: rldimi r12, r6, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r10 +; CHECK-P8-NEXT: rldimi r4, r8, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r12 +; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd v3, vs2, vs3 +; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs2, 0(r4) ; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvspdpn f3, vs2 +; CHECK-P9-NEXT: xscvspdpn f4, vs1 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvdpsxws f4, f5 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: lxv vs3, 32(r4) +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1 +; CHECK-P9-NEXT: or r5, r5, r7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-P9-NEXT: or r4, r4, r7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: or r6, r6, r7 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r8, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: slwi r8, r8, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs3 -; CHECK-P9-NEXT: vmrghh v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: or r7, r8, r7 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r7, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r8, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs3 +; CHECK-P9-NEXT: mffprwz r9, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs3, vs3, 3 +; CHECK-P9-NEXT: slwi r9, r9, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: or r8, r9, r8 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r8, r4, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: mffprwz r10, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 1 +; CHECK-P9-NEXT: mffprwz r9, f1 +; CHECK-P9-NEXT: mtvsrdd vs1, r8, r7 +; CHECK-P9-NEXT: slwi r10, r10, 16 +; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r9, r10, r9 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mtvsrd v1, r5 -; CHECK-P9-NEXT: vmrghh v0, v1, v0 +; CHECK-P9-NEXT: rldimi r9, r6, 32, 0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: xscvspdpn f2, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 3 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r4, r5, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r5, r9 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha ; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r5 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f5, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: xscvspdpn f3, vs2 +; CHECK-BE-NEXT: xscvspdpn f4, vs1 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f5 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: lxv vs3, 48(r4) +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1 +; CHECK-BE-NEXT: or r5, r5, r7 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-BE-NEXT: or r4, r4, r7 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 3 +; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: or r6, r6, r7 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r8, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: slwi r8, r8, 16 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs3 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: or r7, r8, r7 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r7, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r8, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs3 +; CHECK-BE-NEXT: mffprwz r9, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs3, vs3, 3 +; CHECK-BE-NEXT: slwi r9, r9, 16 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: or r8, r9, r8 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r8, r4, 32, 0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v4, v0, v5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: mffprwz r10, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r9, f1 +; CHECK-BE-NEXT: mtvsrdd vs1, r8, r7 +; CHECK-BE-NEXT: slwi r10, r10, 16 +; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r9, r10, r9 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrwz v6, r5 -; CHECK-BE-NEXT: vperm v1, v6, v1, v2 +; CHECK-BE-NEXT: rldimi r9, r6, 32, 0 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) -; CHECK-BE-NEXT: mtvsrwz v3, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: vperm v3, v5, v3, v2 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghw v3, v3, v1 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xscvspdpn f0, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 +; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r4, r5, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r5, r9 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -19,15 +19,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghb v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 -; CHECK-P8-NEXT: sth r3, -2(r1) +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P8-NEXT: sth r4, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; @@ -36,40 +31,21 @@ ; CHECK-P9-NEXT: mtfprd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P9-NEXT: sth r4, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -82,89 +58,68 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r4, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: vmrghb v2, v2, v5 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r3, r4, 8, 16, 23 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r3, r4, 8, 16, 23 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xscvspdpn f0, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-BE-NEXT: blr entry: %0 = fptoui <4 x float> %a to <4 x i8> @@ -175,54 +130,48 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: xxswapd vs4, v3 +; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xscvspdpn f1, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 3 +; CHECK-P8-NEXT: mffprwz r7, f4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: rlwimi r5, r4, 8, 16, 23 +; CHECK-P8-NEXT: rlwimi r5, r6, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r6, f5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: rlwimi r5, r3, 24, 0, 7 +; CHECK-P8-NEXT: mtvsrwz v2, r5 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mffprwz r8, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghb v3, v3, v4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: rlwimi r8, r7, 8, 16, 23 +; CHECK-P8-NEXT: rlwimi r8, r4, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r8, r6, 24, 0, 7 +; CHECK-P8-NEXT: mtvsrwz v3, r8 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -231,51 +180,45 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: mtvsrwz v2, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: mtvsrwz v3, r4 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -283,54 +226,45 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -343,305 +277,256 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: lvx v3, r3, r4 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: lvx v3, r3, r5 +; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 ; CHECK-P8-NEXT: lvx v2, r3, r5 -; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 -; CHECK-P8-NEXT: xxswapd vs2, v4 -; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f1, v4 ; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xxswapd vs5, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs7, v3 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 3 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: mffprwz r5, f0 ; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r5, f2 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f3, vs5 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 1 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: rlwimi r7, r5, 8, 16, 23 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mffprwz r8, f4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: rlwimi r7, r8, 16, 8, 15 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v0, r3, r4 +; CHECK-P8-NEXT: rlwimi r7, r4, 24, 0, 7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvspdpn f4, v2 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 3 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: xxswapd vs4, v0 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: mffprwz r11, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: mffprwz r12, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 1 +; CHECK-P8-NEXT: rlwimi r3, r9, 8, 16, 23 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mtvsrd v7, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v0 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: rlwimi r12, r11, 8, 16, 23 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: mffprwz r10, f5 +; CHECK-P8-NEXT: xscvspdpn f5, v3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: rlwimi r3, r10, 16, 8, 15 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: rlwimi r3, r6, 24, 0, 7 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: rldimi r3, r7, 32, 0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mffprwz r0, f1 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: mffprwz r9, f0 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: rlwimi r5, r0, 8, 16, 23 +; CHECK-P8-NEXT: rlwimi r12, r9, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r9, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v4, v5, v4 -; CHECK-P8-NEXT: mtvsrd v5, r5 -; CHECK-P8-NEXT: vmrghb v0, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: vmrghb v5, v5, v7 -; CHECK-P8-NEXT: vmrghb v1, v1, v6 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v7, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mtvsrd v9, r3 -; CHECK-P8-NEXT: vmrghb v7, v8, v7 -; CHECK-P8-NEXT: vmrghb v6, v6, v9 -; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v6, v7 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: rlwimi r5, r8, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r12, r9, 24, 0, 7 +; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: rlwimi r5, r4, 24, 0, 7 +; CHECK-P8-NEXT: rldimi r5, r12, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r5 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: rlwimi r5, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: rlwimi r5, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v5, v4 +; CHECK-P9-NEXT: rlwimi r6, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r6, r3, 24, 0, 7 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvspdpn f3, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 1 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: rlwimi r5, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: rlwimi r5, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 +; CHECK-BE-NEXT: rlwimi r6, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: rlwimi r6, r3, 24, 0, 7 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -659,14 +544,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghb v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 8 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: sth r3, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr @@ -680,36 +561,18 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 8 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: sth r3, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -722,89 +585,77 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: vmrghb v2, v2, v5 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 8 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: or r3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvspdpn f0, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: slwi r4, r4, 8 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxvx v3, 0, r3 +; CHECK-BE-NEXT: xscvspdpn f0, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: slwi r4, r4, 8 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: blr entry: %0 = fptosi <4 x float> %a to <4 x i8> @@ -815,54 +666,54 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f1, v3 ; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs3, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: slwi r4, r4, 24 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: slwi r5, r5, 16 +; CHECK-P8-NEXT: or r3, r3, r5 +; CHECK-P8-NEXT: slwi r6, r6, 8 +; CHECK-P8-NEXT: mffprwz r5, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghb v3, v3, v4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: or r3, r3, r6 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: or r3, r3, r5 +; CHECK-P8-NEXT: mffprwz r6, f5 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: mffprwz r8, f1 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: slwi r7, r8, 8 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -871,51 +722,51 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrwz v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtvsrwz v3, r3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -923,54 +774,51 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -983,305 +831,296 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v4, 0, r3 -; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: lvx v5, 0, r3 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: lvx v2, r3, r5 -; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 -; CHECK-P8-NEXT: xxswapd vs2, v4 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lvx v4, r3, r4 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lvx v3, r3, r5 +; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f3, v5 +; CHECK-P8-NEXT: xscvspdpn f0, v4 ; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f1, v4 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: xxswapd vs3, v3 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxswapd vs0, v4 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 1 +; CHECK-P8-NEXT: slwi r6, r6, 24 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: xscvspdpn f1, vs4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 +; CHECK-P8-NEXT: slwi r5, r5, 24 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: slwi r4, r4, 24 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: mffprwz r12, f5 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r5, f2 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: mffprwz r8, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: mffprwz r10, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: mffprwz r11, f3 +; CHECK-P8-NEXT: xxswapd vs3, v5 +; CHECK-P8-NEXT: or r3, r3, r7 +; CHECK-P8-NEXT: slwi r8, r8, 8 +; CHECK-P8-NEXT: mffprwz r9, f4 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 3 +; CHECK-P8-NEXT: slwi r10, r10, 16 +; CHECK-P8-NEXT: or r3, r3, r8 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v0, r3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: xxswapd vs4, v0 +; CHECK-P8-NEXT: or r4, r4, r10 +; CHECK-P8-NEXT: slwi r11, r11, 8 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: or r4, r4, r11 +; CHECK-P8-NEXT: or r3, r3, r9 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mtvsrd v7, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v0 +; CHECK-P8-NEXT: or r4, r4, r12 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v4, v5, v4 -; CHECK-P8-NEXT: mtvsrd v5, r5 -; CHECK-P8-NEXT: vmrghb v0, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: vmrghb v5, v5, v7 -; CHECK-P8-NEXT: vmrghb v1, v1, v6 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v7, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mtvsrd v9, r3 -; CHECK-P8-NEXT: vmrghb v7, v8, v7 -; CHECK-P8-NEXT: vmrghb v6, v6, v9 -; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v6, v7 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r8, f5 +; CHECK-P8-NEXT: mffprwz r0, f0 +; CHECK-P8-NEXT: mffprwz r29, f2 +; CHECK-P8-NEXT: mffprwz r30, f1 +; CHECK-P8-NEXT: slwi r10, r0, 16 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: slwi r0, r29, 16 +; CHECK-P8-NEXT: or r5, r5, r10 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r10, f4 +; CHECK-P8-NEXT: or r6, r6, r0 +; CHECK-P8-NEXT: slwi r11, r30, 8 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: slwi r7, r7, 8 +; CHECK-P8-NEXT: or r5, r5, r11 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: or r6, r6, r7 +; CHECK-P8-NEXT: or r5, r5, r10 +; CHECK-P8-NEXT: or r6, r6, r8 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xscvspdpn f4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xscvspdpn f3, vs2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-P9-NEXT: slwi r4, r4, 24 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r3, r3, r5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: slwi r5, r5, 24 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: slwi r6, r6, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r5, r5, r6 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: slwi r6, r6, 8 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: xscvspdpn f4, vs3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs3 +; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xscvspdpn f3, vs2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-BE-NEXT: slwi r4, r4, 24 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvspdpn f3, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r3, r3, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: slwi r5, r5, 24 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: slwi r6, r6, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r5, r5, r6 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: slwi r6, r6, 8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -15,44 +15,29 @@ ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpsxws f0, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-BE-NEXT: blr entry: %0 = fptoui <2 x double> %a to <2 x i16> @@ -64,25 +49,23 @@ ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: vmrghh v3, v5, v3 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: rlwimi r4, r6, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 0, 15 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -95,19 +78,17 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-P9-NEXT: mtvsrwz v2, r4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mtvsrwz v3, r4 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -115,26 +96,21 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -147,137 +123,112 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f7, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f7, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v2, v0, v2 -; CHECK-P8-NEXT: vmrghh v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghh v4, v0, v4 -; CHECK-P8-NEXT: vmrghh v5, v1, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r7, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r8, f1 +; CHECK-P8-NEXT: mffprwz r9, f2 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: rlwimi r4, r8, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r5, r9, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: rlwimi r6, r7, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -288,263 +239,222 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 +; CHECK-P8-NEXT: li r5, 80 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: lxvd2x vs11, 0, r4 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 112 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: xscvdpsxws v3, f11 +; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r5 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 -; CHECK-P8-NEXT: li r6, 112 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: lxvd2x vs9, r4, r5 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f9, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f11, f5 +; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f12, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: mffprwz r7, f4 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f13, f10 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f6, f4 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 +; CHECK-P8-NEXT: xscvdpsxws v2, f9 +; CHECK-P8-NEXT: xxswapd vs9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxswapd vs6, vs10 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs4 -; CHECK-P8-NEXT: mtvsrd v2, r7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f6 -; CHECK-P8-NEXT: vmrghh v2, v8, v2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v3, v9, v3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghh v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f11, f11 ; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: vmrghh v0, v8, v0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v6, v8, v6 -; CHECK-P8-NEXT: vmrghh v7, v9, v7 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: mffprwz r0, f0 +; CHECK-P8-NEXT: mffprwz r6, f6 +; CHECK-P8-NEXT: mffprwz r30, f1 +; CHECK-P8-NEXT: rlwimi r4, r0, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r9, f12 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: rlwimi r6, r30, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P8-NEXT: mffprwz r8, f10 +; CHECK-P8-NEXT: rlwimi r9, r0, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r10, f13 +; CHECK-P8-NEXT: mfvsrwz r11, v2 +; CHECK-P8-NEXT: mfvsrwz r12, v3 +; CHECK-P8-NEXT: mffprwz r29, f2 +; CHECK-P8-NEXT: mffprwz r28, f3 +; CHECK-P8-NEXT: mffprwz r30, f7 +; CHECK-P8-NEXT: rlwimi r7, r29, 16, 0, 15 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r0, f9 +; CHECK-P8-NEXT: rlwimi r8, r28, 16, 0, 15 +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: rlwimi r10, r30, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r8, r7, 32, 0 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: rlwimi r11, r0, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r10, r9, 32, 0 +; CHECK-P8-NEXT: mtfprd f0, r6 +; CHECK-P8-NEXT: rlwimi r12, r4, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f1, r8 +; CHECK-P8-NEXT: rldimi r12, r11, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r10 +; CHECK-P8-NEXT: mtfprd f3, r12 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd v3, vs2, vs3 +; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r4) -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: lxv vs0, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xscvdpsxws f5, f2 -; CHECK-P9-NEXT: xscvdpsxws f6, f1 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f7, f0 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: lxv vs7, 16(r4) +; CHECK-P9-NEXT: lxv vs6, 0(r4) +; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: lxv vs4, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 +; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: lxv vs2, 64(r4) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: mffprwz r4, f8 ; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: vmrghh v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f7, f6 +; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: mffprwz r6, f6 +; CHECK-P9-NEXT: xscvdpsxws f6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: mffprwz r7, f5 +; CHECK-P9-NEXT: xscvdpsxws f5, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r4, f5 +; CHECK-P9-NEXT: mffprwz r8, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: vmrghh v3, v3, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: rlwimi r8, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: rldimi r8, r7, 32, 0 ; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: vmrglw v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 +; CHECK-P9-NEXT: xscvdpsxws f4, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: stxv vs4, 0(r3) -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mffprwz r9, f3 +; CHECK-P9-NEXT: mtvsrdd vs3, r8, r6 +; CHECK-P9-NEXT: rlwimi r9, r4, 16, 0, 15 +; CHECK-P9-NEXT: stxv vs3, 0(r3) +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: rldimi r5, r9, 32, 0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 +; CHECK-P9-NEXT: mffprwz r7, f0 +; CHECK-P9-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r7, r6, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r7, r5 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r4) -; CHECK-BE-NEXT: lxv vs2, 32(r4) -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r5 -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xscvdpsxws f6, f1 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f0 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: lxv vs7, 32(r4) +; CHECK-BE-NEXT: lxv vs6, 48(r4) +; CHECK-BE-NEXT: lxv vs5, 0(r4) +; CHECK-BE-NEXT: lxv vs4, 16(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs3, 96(r4) +; CHECK-BE-NEXT: lxv vs0, 80(r4) +; CHECK-BE-NEXT: lxv vs1, 64(r4) +; CHECK-BE-NEXT: lxv vs2, 112(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: mffprwz r4, f8 ; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: vperm v3, v3, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 80(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mffprwz r6, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mffprwz r7, f5 +; CHECK-BE-NEXT: xscvdpsxws f5, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs4 +; CHECK-BE-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: mffprwz r8, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: rlwimi r8, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: rldimi r8, r7, 32, 0 ; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: vmrghw v5, v0, v5 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 +; CHECK-BE-NEXT: xscvdpsxws f4, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 -; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r9, f3 +; CHECK-BE-NEXT: mtvsrdd vs3, r8, r6 +; CHECK-BE-NEXT: rlwimi r9, r4, 16, 0, 15 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: rldimi r5, r9, 32, 0 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: mffprwz r7, f0 +; CHECK-BE-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r7, r6, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r7, r5 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -561,43 +471,31 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: slwi r3, r3, 16 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: xscvdpsxws f0, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: blr entry: %0 = fptosi <2 x double> %a to <2 x i16> @@ -609,25 +507,25 @@ ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r6, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: vmrghh v3, v5, v3 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: or r3, r4, r3 +; CHECK-P8-NEXT: slwi r4, r5, 16 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -636,23 +534,23 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xscvdpsxws f2, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mtvsrwz v3, r3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -660,26 +558,23 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xscvdpsxws f2, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -692,137 +587,124 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f7, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f7, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v2, v0, v2 -; CHECK-P8-NEXT: vmrghh v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghh v4, v0, v4 -; CHECK-P8-NEXT: vmrghh v5, v1, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r6, f0 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r3, r6, r3 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: or r4, r7, r4 +; CHECK-P8-NEXT: slwi r6, r8, 16 +; CHECK-P8-NEXT: mffprwz r10, f7 +; CHECK-P8-NEXT: slwi r7, r9, 16 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: or r5, r6, r5 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: or r6, r7, r10 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvdpsxws f2, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: or r4, r5, r4 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: or r3, r5, r3 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xxswapd vs4, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvdpsxws f2, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: or r3, r5, r3 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -833,263 +715,248 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 +; CHECK-P8-NEXT: li r5, 80 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: lxvd2x vs11, 0, r4 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 112 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: xscvdpsxws v3, f11 +; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 -; CHECK-P8-NEXT: li r6, 112 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r5 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: lxvd2x vs9, r4, r5 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f9, f3 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f11, f5 -; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f12, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: mffprwz r7, f4 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f13, f10 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f6, f4 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f11 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xxswapd vs5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 +; CHECK-P8-NEXT: xscvdpsxws v2, f9 +; CHECK-P8-NEXT: xxswapd vs9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxswapd vs6, vs10 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs4 -; CHECK-P8-NEXT: mtvsrd v2, r7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f6 -; CHECK-P8-NEXT: vmrghh v2, v8, v2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v3, v9, v3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghh v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f11, f11 +; CHECK-P8-NEXT: mffprwz r12, f0 +; CHECK-P8-NEXT: mffprwz r0, f1 ; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: vmrghh v0, v8, v0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v6, v8, v6 -; CHECK-P8-NEXT: vmrghh v7, v9, v7 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 -; CHECK-P8-NEXT: xxmrgld v2, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: slwi r12, r12, 16 +; CHECK-P8-NEXT: mffprwz r6, f6 +; CHECK-P8-NEXT: slwi r0, r0, 16 +; CHECK-P8-NEXT: mffprwz r30, f2 +; CHECK-P8-NEXT: or r4, r12, r4 +; CHECK-P8-NEXT: mffprwz r29, f3 +; CHECK-P8-NEXT: or r6, r0, r6 +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: slwi r30, r30, 16 +; CHECK-P8-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P8-NEXT: mffprwz r8, f10 +; CHECK-P8-NEXT: slwi r29, r29, 16 +; CHECK-P8-NEXT: mffprwz r28, f5 +; CHECK-P8-NEXT: or r7, r30, r7 +; CHECK-P8-NEXT: mffprwz r27, f7 +; CHECK-P8-NEXT: or r8, r29, r8 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r12, f9 +; CHECK-P8-NEXT: rldimi r8, r7, 32, 0 +; CHECK-P8-NEXT: slwi r0, r28, 16 +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: slwi r30, r27, 16 +; CHECK-P8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r9, f12 +; CHECK-P8-NEXT: mffprwz r10, f13 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: mfvsrwz r11, v2 +; CHECK-P8-NEXT: or r9, r0, r9 +; CHECK-P8-NEXT: mfvsrwz r7, v3 +; CHECK-P8-NEXT: or r10, r30, r10 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mtfprd f0, r6 +; CHECK-P8-NEXT: slwi r6, r12, 16 +; CHECK-P8-NEXT: rldimi r10, r9, 32, 0 +; CHECK-P8-NEXT: or r6, r6, r11 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: mtfprd f1, r8 +; CHECK-P8-NEXT: rldimi r4, r6, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r10 +; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd v3, vs2, vs3 +; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r4) -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: lxv vs0, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xscvdpsxws f5, f2 -; CHECK-P9-NEXT: xscvdpsxws f6, f1 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f7, f0 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: lxv vs7, 16(r4) +; CHECK-P9-NEXT: lxv vs6, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: xxswapd vs8, vs7 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs2, 64(r4) +; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs4, 32(r4) +; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f8 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs6 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: mffprwz r5, f8 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: or r4, r4, r5 ; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: vmrghh v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 96(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: vmrghh v3, v3, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: xxswapd vs6, vs5 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: mffprwz r6, f7 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: mffprwz r7, f6 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: or r6, r6, r7 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 -; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: vmrglw v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: slwi r7, r7, 16 +; CHECK-P9-NEXT: mffprwz r8, f5 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: or r7, r7, r8 +; CHECK-P9-NEXT: mffprwz r8, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: stxv vs4, 0(r3) +; CHECK-P9-NEXT: rldimi r7, r6, 32, 0 +; CHECK-P9-NEXT: slwi r8, r8, 16 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r9, f4 +; CHECK-P9-NEXT: or r8, r8, r9 ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: xscvdpsxws f2, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd vs3, r7, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: stxv vs3, 0(r3) +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: or r4, r5, r4 +; CHECK-P9-NEXT: rldimi r4, r8, 32, 0 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: mffprwz r7, f0 +; CHECK-P9-NEXT: slwi r7, r7, 16 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: or r6, r7, r6 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r6, r4 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r4) -; CHECK-BE-NEXT: lxv vs2, 32(r4) -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r5 -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xscvdpsxws f6, f1 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f0 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtvsrwz v4, r5 +; CHECK-BE-NEXT: lxv vs7, 32(r4) +; CHECK-BE-NEXT: lxv vs6, 48(r4) +; CHECK-BE-NEXT: lxv vs0, 80(r4) +; CHECK-BE-NEXT: lxv vs1, 64(r4) +; CHECK-BE-NEXT: xxswapd vs8, vs7 +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: lxv vs2, 112(r4) +; CHECK-BE-NEXT: lxv vs3, 96(r4) +; CHECK-BE-NEXT: lxv vs4, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f8 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs6 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: mffprwz r5, f8 +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: or r4, r4, r5 ; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: vperm v3, v3, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 80(r4) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: xxswapd vs6, vs5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: mffprwz r6, f7 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: mffprwz r7, f6 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: or r6, r6, r7 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 -; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: vmrghw v5, v0, v5 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 -; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: mffprwz r8, f5 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: or r7, r7, r8 +; CHECK-BE-NEXT: mffprwz r8, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: rldimi r7, r6, 32, 0 +; CHECK-BE-NEXT: slwi r8, r8, 16 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r9, f4 +; CHECK-BE-NEXT: or r8, r8, r9 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xscvdpsxws f2, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mtvsrdd vs3, r7, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: rldimi r4, r8, 32, 0 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: mffprwz r7, f0 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: or r6, r7, r6 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r6, r4 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -16,14 +16,9 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghb v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 -; CHECK-P8-NEXT: sth r3, -2(r1) +; CHECK-P8-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P8-NEXT: sth r4, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; @@ -32,33 +27,17 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P9-NEXT: sth r4, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -70,28 +49,22 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs0 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: vmrghb v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: @@ -101,50 +74,35 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, <4 x double>* %0, align 32 @@ -156,48 +114,42 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f5, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f5, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f7, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: rlwimi r3, r7, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: rlwimi r5, r8, 8, 16, 23 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v2, v0, v2 -; CHECK-P8-NEXT: vmrghb v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v0, v4 -; CHECK-P8-NEXT: vmrghb v5, v1, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: rlwimi r5, r6, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r7, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r5, r4, 24, 0, 7 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: mtvsrwz v3, r5 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -206,90 +158,75 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: mtvsrwz v2, r4 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: mtvsrwz v3, r4 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -302,261 +239,212 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 ; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: li r5, 96 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lxvd2x vs5, r3, r5 +; CHECK-P8-NEXT: li r5, 64 ; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: li r4, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: lxvd2x vs5, r3, r4 -; CHECK-P8-NEXT: li r4, 80 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs9, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs7, r3, r4 -; CHECK-P8-NEXT: li r4, 96 +; CHECK-P8-NEXT: li r4, 80 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: lxvd2x vs11, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, v3 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mffprwz r11, f0 +; CHECK-P8-NEXT: mffprwz r5, f8 +; CHECK-P8-NEXT: mffprwz r7, f12 +; CHECK-P8-NEXT: rlwimi r3, r11, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r12, f2 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: rlwimi r5, r12, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r6, f10 +; CHECK-P8-NEXT: rlwimi r7, r0, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r8, f13 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P8-NEXT: mfvsrwz r9, v2 +; CHECK-P8-NEXT: rlwimi r5, r6, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r11, f9 +; CHECK-P8-NEXT: rlwimi r7, r8, 16, 8, 15 +; CHECK-P8-NEXT: mfvsrwz r10, v3 +; CHECK-P8-NEXT: mffprwz r12, f1 +; CHECK-P8-NEXT: rlwimi r9, r11, 8, 16, 23 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v8, v4 -; CHECK-P8-NEXT: vmrghb v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghb v0, v8, v0 -; CHECK-P8-NEXT: vmrghb v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrghb v6, v8, v6 -; CHECK-P8-NEXT: vmrghb v2, v9, v2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v3, v8, v3 -; CHECK-P8-NEXT: vmrghb v7, v9, v7 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v2, v2, v6 -; CHECK-P8-NEXT: vmrglh v3, v7, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: xxmrgld v2, v2, v4 +; CHECK-P8-NEXT: rlwimi r9, r10, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r12, 24, 0, 7 +; CHECK-P8-NEXT: mffprwz r8, f11 +; CHECK-P8-NEXT: rlwimi r5, r4, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r7, r6, 24, 0, 7 +; CHECK-P8-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P8-NEXT: rlwimi r9, r8, 24, 0, 7 +; CHECK-P8-NEXT: mtfprd f0, r5 +; CHECK-P8-NEXT: rldimi r9, r7, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r9 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: lxv vs7, 32(r3) +; CHECK-P9-NEXT: lxv vs6, 48(r3) +; CHECK-P9-NEXT: lxv vs5, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 80(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: lxv vs2, 80(r3) -; CHECK-P9-NEXT: lxv vs3, 64(r3) -; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 64(r3) +; CHECK-P9-NEXT: lxv vs2, 112(r3) +; CHECK-P9-NEXT: lxv vs3, 96(r3) +; CHECK-P9-NEXT: lxv vs4, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: xscvdpsxws f7, f6 -; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: rlwimi r5, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: rlwimi r6, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r6, r3, 24, 0, 7 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs7, 80(r3) +; CHECK-BE-NEXT: lxv vs6, 64(r3) +; CHECK-BE-NEXT: lxv vs5, 112(r3) +; CHECK-BE-NEXT: lxv vs0, 32(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs1, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: lxv vs3, 16(r3) +; CHECK-BE-NEXT: lxv vs4, 96(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: xscvdpsxws f7, f6 -; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs6 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: xscvdpsxws f5, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: rlwimi r5, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: rlwimi r5, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: rlwimi r6, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: rlwimi r6, r3, 24, 0, 7 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 @@ -571,49 +459,30 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: slwi r3, r3, 8 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghb v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: sth r3, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: xscvdpsxws f0, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: sth r3, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -626,80 +495,68 @@ ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: slwi r4, r4, 16 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: vmrghb v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 8 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: or r3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 16(r3) +; CHECK-P9-NEXT: lxv vs1, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, <4 x double>* %0, align 32 @@ -712,16 +569,16 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs3, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xscvdpsxws f7, f3 @@ -731,120 +588,117 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v2, v0, v2 -; CHECK-P8-NEXT: vmrghb v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v0, v4 -; CHECK-P8-NEXT: vmrghb v5, v1, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: mffprwz r5, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r6, f1 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: slwi r5, r5, 24 +; CHECK-P8-NEXT: mffprwz r8, f3 +; CHECK-P8-NEXT: or r3, r5, r3 +; CHECK-P8-NEXT: slwi r6, r6, 8 +; CHECK-P8-NEXT: slwi r7, r7, 24 +; CHECK-P8-NEXT: mffprwz r5, f5 +; CHECK-P8-NEXT: or r3, r3, r6 +; CHECK-P8-NEXT: or r4, r7, r4 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: slwi r7, r8, 8 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: or r3, r3, r5 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: mtvsrwz v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtvsrwz v3, r3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -857,261 +711,252 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: li r5, 96 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: li r4, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs7, r3, r5 +; CHECK-P8-NEXT: li r5, 64 ; CHECK-P8-NEXT: lxvd2x vs5, r3, r4 ; CHECK-P8-NEXT: li r4, 80 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: lxvd2x vs7, r3, r4 -; CHECK-P8-NEXT: li r4, 96 -; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 -; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs11, r3, r5 ; CHECK-P8-NEXT: xscvdpsxws f10, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: lxvd2x vs11, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f8, f2 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, v3 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v8, v4 -; CHECK-P8-NEXT: vmrghb v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghb v0, v8, v0 -; CHECK-P8-NEXT: vmrghb v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrghb v6, v8, v6 -; CHECK-P8-NEXT: vmrghb v2, v9, v2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v3, v8, v3 -; CHECK-P8-NEXT: vmrghb v7, v9, v7 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v2, v2, v6 -; CHECK-P8-NEXT: vmrglh v3, v7, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: xxmrgld v2, v2, v4 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mffprwz r9, f0 +; CHECK-P8-NEXT: mffprwz r5, f8 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r11, f2 +; CHECK-P8-NEXT: slwi r9, r9, 24 +; CHECK-P8-NEXT: mffprwz r7, f12 +; CHECK-P8-NEXT: slwi r5, r5, 16 +; CHECK-P8-NEXT: or r3, r9, r3 +; CHECK-P8-NEXT: mfvsrwz r8, v2 +; CHECK-P8-NEXT: slwi r11, r11, 24 +; CHECK-P8-NEXT: mffprwz r10, f1 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r5, r11, r5 +; CHECK-P8-NEXT: mffprwz r12, f3 +; CHECK-P8-NEXT: slwi r8, r8, 16 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: slwi r10, r10, 8 +; CHECK-P8-NEXT: mffprwz r29, f9 +; CHECK-P8-NEXT: slwi r12, r12, 8 +; CHECK-P8-NEXT: or r3, r3, r10 +; CHECK-P8-NEXT: mffprwz r30, f7 +; CHECK-P8-NEXT: slwi r11, r0, 24 +; CHECK-P8-NEXT: or r5, r5, r12 +; CHECK-P8-NEXT: mffprwz r9, f11 +; CHECK-P8-NEXT: slwi r0, r29, 24 +; CHECK-P8-NEXT: or r7, r11, r7 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: or r8, r0, r8 +; CHECK-P8-NEXT: slwi r12, r30, 8 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r6, f10 +; CHECK-P8-NEXT: slwi r9, r9, 8 +; CHECK-P8-NEXT: or r7, r7, r12 +; CHECK-P8-NEXT: mffprwz r11, f13 +; CHECK-P8-NEXT: or r8, r8, r9 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: mfvsrwz r10, v3 +; CHECK-P8-NEXT: or r4, r5, r6 +; CHECK-P8-NEXT: or r5, r7, r11 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: or r6, r8, r10 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: lxv vs7, 48(r3) +; CHECK-P9-NEXT: lxv vs6, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 64(r3) +; CHECK-P9-NEXT: lxv vs1, 80(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: lxv vs2, 80(r3) -; CHECK-P9-NEXT: lxv vs3, 64(r3) -; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: lxv vs2, 96(r3) +; CHECK-P9-NEXT: lxv vs3, 112(r3) +; CHECK-P9-NEXT: lxv vs4, 0(r3) +; CHECK-P9-NEXT: lxv vs5, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f6 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: slwi r4, r4, 24 +; CHECK-P9-NEXT: mffprwz r5, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: or r4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: or r3, r3, r5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r5, r5, 24 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: slwi r6, r6, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: slwi r6, r6, 8 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs7, 64(r3) +; CHECK-BE-NEXT: lxv vs6, 80(r3) +; CHECK-BE-NEXT: lxv vs0, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 32(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: lxv vs2, 16(r3) +; CHECK-BE-NEXT: lxv vs3, 0(r3) +; CHECK-BE-NEXT: lxv vs4, 112(r3) +; CHECK-BE-NEXT: lxv vs5, 96(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 -; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f6 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: slwi r4, r4, 24 +; CHECK-BE-NEXT: mffprwz r5, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: or r4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: or r3, r3, r5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r5, r5, 24 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: slwi r6, r6, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: slwi r6, r6, 8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 Index: llvm/test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_int_ext.ll +++ llvm/test/CodeGen/PowerPC/vec_int_ext.ll @@ -288,91 +288,80 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) { ; CHECK-LE-LABEL: testInvalidExtend: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: li 3, 0 ; CHECK-LE-NEXT: li 4, 2 -; CHECK-LE-NEXT: li 5, 4 ; CHECK-LE-NEXT: li 6, 6 -; CHECK-LE-NEXT: vextubrx 3, 3, 2 +; CHECK-LE-NEXT: li 3, 0 +; CHECK-LE-NEXT: li 5, 4 ; CHECK-LE-NEXT: vextubrx 4, 4, 2 -; CHECK-LE-NEXT: vextubrx 5, 5, 2 ; CHECK-LE-NEXT: vextubrx 6, 6, 2 -; CHECK-LE-NEXT: li 7, 8 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 +; CHECK-LE-NEXT: vextubrx 5, 5, 2 ; CHECK-LE-NEXT: li 8, 10 -; CHECK-LE-NEXT: li 9, 12 ; CHECK-LE-NEXT: li 10, 14 -; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: li 7, 8 +; CHECK-LE-NEXT: li 9, 12 ; CHECK-LE-NEXT: extsb 4, 4 -; CHECK-LE-NEXT: extsb 5, 5 ; CHECK-LE-NEXT: extsb 6, 6 -; CHECK-LE-NEXT: vextubrx 7, 7, 2 ; CHECK-LE-NEXT: vextubrx 8, 8, 2 -; CHECK-LE-NEXT: extsb 7, 7 +; CHECK-LE-NEXT: vextubrx 10, 10, 2 +; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: extsb 5, 5 ; CHECK-LE-NEXT: extsb 8, 8 -; CHECK-LE-NEXT: mtvsrd 35, 4 +; CHECK-LE-NEXT: extsb 10, 10 +; CHECK-LE-NEXT: slwi 6, 6, 16 +; CHECK-LE-NEXT: slwi 4, 4, 16 +; CHECK-LE-NEXT: vextubrx 7, 7, 2 ; CHECK-LE-NEXT: vextubrx 9, 9, 2 -; CHECK-LE-NEXT: vextubrx 10, 10, 2 -; CHECK-LE-NEXT: mtvsrd 34, 3 -; CHECK-LE-NEXT: mtvsrd 36, 6 +; CHECK-LE-NEXT: extsb 7, 7 ; CHECK-LE-NEXT: extsb 9, 9 -; CHECK-LE-NEXT: extsb 10, 10 -; CHECK-LE-NEXT: mtvsrd 37, 10 -; CHECK-LE-NEXT: vmrghh 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 5 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: mtvsrd 36, 8 -; CHECK-LE-NEXT: vmrglw 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 7 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: mtvsrd 36, 9 -; CHECK-LE-NEXT: vmrghh 4, 5, 4 -; CHECK-LE-NEXT: vmrglw 3, 4, 3 -; CHECK-LE-NEXT: xxmrgld 34, 35, 34 +; CHECK-LE-NEXT: or 5, 6, 5 +; CHECK-LE-NEXT: or 3, 4, 3 +; CHECK-LE-NEXT: slwi 4, 10, 16 +; CHECK-LE-NEXT: rldimi 3, 5, 32, 0 +; CHECK-LE-NEXT: slwi 5, 8, 16 +; CHECK-LE-NEXT: or 4, 4, 9 +; CHECK-LE-NEXT: or 5, 5, 7 +; CHECK-LE-NEXT: rldimi 5, 4, 32, 0 +; CHECK-LE-NEXT: mtvsrdd 34, 5, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: testInvalidExtend: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: li 9, 12 -; CHECK-BE-NEXT: li 10, 14 -; CHECK-BE-NEXT: li 7, 8 -; CHECK-BE-NEXT: li 8, 10 -; CHECK-BE-NEXT: vextublx 9, 9, 2 -; CHECK-BE-NEXT: vextublx 10, 10, 2 -; CHECK-BE-NEXT: vextublx 7, 7, 2 -; CHECK-BE-NEXT: vextublx 8, 8, 2 ; CHECK-BE-NEXT: li 3, 0 +; CHECK-BE-NEXT: li 7, 8 ; CHECK-BE-NEXT: li 4, 2 ; CHECK-BE-NEXT: li 5, 4 -; CHECK-BE-NEXT: li 6, 6 -; CHECK-BE-NEXT: extsb 9, 9 -; CHECK-BE-NEXT: extsb 10, 10 ; CHECK-BE-NEXT: vextublx 3, 3, 2 +; CHECK-BE-NEXT: vextublx 7, 7, 2 +; CHECK-BE-NEXT: li 8, 10 +; CHECK-BE-NEXT: li 9, 12 ; CHECK-BE-NEXT: vextublx 4, 4, 2 +; CHECK-BE-NEXT: li 6, 6 +; CHECK-BE-NEXT: li 10, 14 +; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: vextublx 5, 5, 2 +; CHECK-BE-NEXT: vextublx 8, 8, 2 +; CHECK-BE-NEXT: vextublx 9, 9, 2 ; CHECK-BE-NEXT: extsb 7, 7 -; CHECK-BE-NEXT: extsb 8, 8 -; CHECK-BE-NEXT: extsb 5, 5 -; CHECK-BE-NEXT: extsb 3, 3 -; CHECK-BE-NEXT: extsb 4, 4 -; CHECK-BE-NEXT: mtvsrwz 35, 9 -; CHECK-BE-NEXT: addis 9, 2, .LCPI11_0@toc@ha ; CHECK-BE-NEXT: vextublx 6, 6, 2 -; CHECK-BE-NEXT: mtvsrwz 34, 10 -; CHECK-BE-NEXT: mtvsrwz 37, 7 +; CHECK-BE-NEXT: extsb 4, 4 +; CHECK-BE-NEXT: extsb 5, 5 +; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: extsb 9, 9 ; CHECK-BE-NEXT: extsb 6, 6 -; CHECK-BE-NEXT: mtvsrwz 32, 3 -; CHECK-BE-NEXT: addi 9, 9, .LCPI11_0@toc@l -; CHECK-BE-NEXT: lxvx 36, 0, 9 -; CHECK-BE-NEXT: vperm 2, 3, 2, 4 -; CHECK-BE-NEXT: mtvsrwz 35, 8 -; CHECK-BE-NEXT: vperm 3, 5, 3, 4 -; CHECK-BE-NEXT: mtvsrwz 37, 5 -; CHECK-BE-NEXT: vmrghw 2, 3, 2 -; CHECK-BE-NEXT: mtvsrwz 35, 6 -; CHECK-BE-NEXT: vperm 3, 5, 3, 4 -; CHECK-BE-NEXT: mtvsrwz 37, 4 -; CHECK-BE-NEXT: vperm 4, 0, 5, 4 -; CHECK-BE-NEXT: vmrghw 3, 4, 3 -; CHECK-BE-NEXT: xxmrghd 34, 35, 34 +; CHECK-BE-NEXT: slwi 7, 7, 16 +; CHECK-BE-NEXT: vextublx 10, 10, 2 +; CHECK-BE-NEXT: slwi 3, 3, 16 +; CHECK-BE-NEXT: extsb 10, 10 +; CHECK-BE-NEXT: or 7, 7, 8 +; CHECK-BE-NEXT: slwi 8, 9, 16 +; CHECK-BE-NEXT: or 3, 3, 4 +; CHECK-BE-NEXT: slwi 4, 5, 16 +; CHECK-BE-NEXT: or 8, 8, 10 +; CHECK-BE-NEXT: or 4, 4, 6 +; CHECK-BE-NEXT: rldimi 8, 7, 32, 0 +; CHECK-BE-NEXT: rldimi 4, 3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd 34, 4, 8 ; CHECK-BE-NEXT: blr entry: