diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9187,6 +9187,49 @@ return (!LosesInfo && !APFloatToConvert.isDenormal()); } +// Use rldimi/rlwimi to construct vectors: +// i32 = (i8 << 24) | (i8 << 16) | (i8 << 8) | i +// i32 = (i16 << 16) | i16 +// i64 = (i32 << 32) | i32 +// And put two i64 together to get a vector. +static SDValue tryMaskInsertVector(SDValue Op, SelectionDAG &DAG, + bool LittleEndian) { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // There are already patterns for v4i32 and v2i64 construction. + if (VT == MVT::v16i8 || VT == MVT::v8i16) { + int NumElt = VT.getVectorNumElements(); + int ScalarSize = VT.getScalarSizeInBits(); + int EltsFor32 = NumElt / 4; + SDValue NewVecElts[4]; + SDValue Parts[4]; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < EltsFor32; ++j) { + SDValue Elt = LittleEndian + ? Op.getOperand(i * EltsFor32 + EltsFor32 - j - 1) + : Op.getOperand(i * EltsFor32 + j); + Parts[j] = DAG.getZExtOrTrunc(Elt, dl, MVT::i32); + + // Left-shift elements to insert, except the last, because offset is 0. + if (j != EltsFor32 - 1) + Parts[j] = + DAG.getNode(ISD::SHL, dl, MVT::i32, Parts[j], + DAG.getTargetConstant( + ScalarSize * (EltsFor32 - j - 1), dl, MVT::i32)); + if (j > 0) + Parts[j] = DAG.getNode(ISD::OR, dl, MVT::i32, Parts[j - 1], Parts[j]); + } + NewVecElts[i] = Parts[EltsFor32 - 1]; + } + + // Count on v4i32 to get optimized BUILD_VECTOR pattern. + return DAG.getBitcast(VT, DAG.getBuildVector(MVT::v4i32, dl, NewVecElts)); + } + + return SDValue(); +} + static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode) { LoadSDNode *InputNode = dyn_cast(Op.getOperand(0)); @@ -9368,6 +9411,13 @@ haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; + + // Try to construct vector using masked insert. + if (!BVN->isConstant() && !DAG.isSplatValue(Op, true)) + if (SDValue Res = + tryMaskInsertVector(Op, DAG, Subtarget.isLittleEndian())) + return Res; + return SDValue(); } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -282,55 +282,65 @@ ; CHECK-PWR9-LE-LABEL: sub_absv_8_ext: ; CHECK-PWR9-LE: # %bb.0: # %entry ; CHECK-PWR9-LE-NEXT: li r3, 0 -; CHECK-PWR9-LE-NEXT: li r5, 2 +; CHECK-PWR9-LE-NEXT: li r7, 4 ; CHECK-PWR9-LE-NEXT: li r4, 1 +; CHECK-PWR9-LE-NEXT: li r5, 2 ; CHECK-PWR9-LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-PWR9-LE-NEXT: vextubrx r6, r3, v2 -; CHECK-PWR9-LE-NEXT: vextubrx r3, r3, v3 -; CHECK-PWR9-LE-NEXT: vextubrx r8, r5, v2 -; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3 +; CHECK-PWR9-LE-NEXT: li r6, 3 ; CHECK-PWR9-LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-PWR9-LE-NEXT: vextubrx r8, r3, v2 +; CHECK-PWR9-LE-NEXT: vextubrx r3, r3, v3 +; CHECK-PWR9-LE-NEXT: vextubrx r9, r4, v2 ; CHECK-PWR9-LE-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-PWR9-LE-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-PWR9-LE-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-PWR9-LE-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-PWR9-LE-NEXT: clrlwi r6, r6, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r3, r3, 24 -; CHECK-PWR9-LE-NEXT: vextubrx r7, r4, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r4, r4, v3 ; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24 -; CHECK-PWR9-LE-NEXT: sub r3, r6, r3 -; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r4, r4, 24 -; CHECK-PWR9-LE-NEXT: sub r5, r8, r5 -; CHECK-PWR9-LE-NEXT: sub r4, r7, r4 -; CHECK-PWR9-LE-NEXT: srawi r6, r3, 31 -; CHECK-PWR9-LE-NEXT: srawi r7, r4, 31 -; CHECK-PWR9-LE-NEXT: xor r3, r3, r6 -; CHECK-PWR9-LE-NEXT: xor r4, r4, r7 -; CHECK-PWR9-LE-NEXT: sub r6, r3, r6 -; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31 -; CHECK-PWR9-LE-NEXT: sub r4, r4, r7 -; CHECK-PWR9-LE-NEXT: xor r5, r5, r3 -; CHECK-PWR9-LE-NEXT: sub r3, r5, r3 -; CHECK-PWR9-LE-NEXT: li r5, 3 -; CHECK-PWR9-LE-NEXT: vextubrx r7, r5, v2 +; CHECK-PWR9-LE-NEXT: clrlwi r3, r3, 24 +; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24 +; CHECK-PWR9-LE-NEXT: vextubrx r10, r5, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3 -; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24 +; CHECK-PWR9-LE-NEXT: clrlwi r4, r4, 24 +; CHECK-PWR9-LE-NEXT: vextubrx r11, r6, v2 +; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 +; CHECK-PWR9-LE-NEXT: vextubrx r6, r6, v3 ; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24 -; CHECK-PWR9-LE-NEXT: sub r5, r7, r5 -; CHECK-PWR9-LE-NEXT: srawi r7, r5, 31 -; CHECK-PWR9-LE-NEXT: xor r5, r5, r7 -; CHECK-PWR9-LE-NEXT: sub r5, r5, r7 -; CHECK-PWR9-LE-NEXT: li r7, 4 +; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24 +; CHECK-PWR9-LE-NEXT: sub r3, r8, r3 +; CHECK-PWR9-LE-NEXT: sub r4, r9, r4 +; CHECK-PWR9-LE-NEXT: sub r5, r10, r5 +; CHECK-PWR9-LE-NEXT: clrlwi r6, r6, 24 +; CHECK-PWR9-LE-NEXT: sub r6, r11, r6 +; CHECK-PWR9-LE-NEXT: srawi r8, r3, 31 +; CHECK-PWR9-LE-NEXT: xor r3, r3, r8 +; CHECK-PWR9-LE-NEXT: srawi r9, r4, 31 +; CHECK-PWR9-LE-NEXT: xor r4, r4, r9 +; CHECK-PWR9-LE-NEXT: srawi r10, r5, 31 +; CHECK-PWR9-LE-NEXT: srawi r11, r6, 31 +; CHECK-PWR9-LE-NEXT: xor r5, r5, r10 +; CHECK-PWR9-LE-NEXT: xor r12, r6, r11 +; CHECK-PWR9-LE-NEXT: sub r3, r3, r8 ; CHECK-PWR9-LE-NEXT: vextubrx r8, r7, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r7, r7, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r5 +; CHECK-PWR9-LE-NEXT: sub r4, r4, r9 +; CHECK-PWR9-LE-NEXT: sub r6, r5, r10 +; CHECK-PWR9-LE-NEXT: sub r5, r12, r11 +; CHECK-PWR9-LE-NEXT: li r11, 7 +; CHECK-PWR9-LE-NEXT: slwi r6, r6, 16 +; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2 +; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3 +; CHECK-PWR9-LE-NEXT: slwi r5, r5, 24 +; CHECK-PWR9-LE-NEXT: slwi r4, r4, 8 ; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24 ; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24 +; CHECK-PWR9-LE-NEXT: or r5, r5, r6 ; CHECK-PWR9-LE-NEXT: sub r7, r8, r7 +; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24 +; CHECK-PWR9-LE-NEXT: clrlwi r0, r11, 24 +; CHECK-PWR9-LE-NEXT: or r4, r5, r4 ; CHECK-PWR9-LE-NEXT: srawi r8, r7, 31 +; CHECK-PWR9-LE-NEXT: or r3, r4, r3 ; CHECK-PWR9-LE-NEXT: xor r7, r7, r8 ; CHECK-PWR9-LE-NEXT: sub r7, r7, r8 ; CHECK-PWR9-LE-NEXT: li r8, 5 @@ -345,40 +355,41 @@ ; CHECK-PWR9-LE-NEXT: li r9, 6 ; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3 +; CHECK-PWR9-LE-NEXT: slwi r8, r8, 8 +; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 +; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24 +; CHECK-PWR9-LE-NEXT: sub r9, r10, r9 +; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31 +; CHECK-PWR9-LE-NEXT: xor r9, r9, r10 +; CHECK-PWR9-LE-NEXT: sub r11, r9, r10 +; CHECK-PWR9-LE-NEXT: sub r9, r12, r0 +; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31 +; CHECK-PWR9-LE-NEXT: slwi r11, r11, 16 +; CHECK-PWR9-LE-NEXT: xor r9, r9, r10 +; CHECK-PWR9-LE-NEXT: sub r12, r9, r10 +; CHECK-PWR9-LE-NEXT: li r9, 8 +; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2 +; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3 +; CHECK-PWR9-LE-NEXT: slwi r12, r12, 24 +; CHECK-PWR9-LE-NEXT: or r11, r12, r11 ; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 ; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24 +; CHECK-PWR9-LE-NEXT: or r8, r11, r8 ; CHECK-PWR9-LE-NEXT: sub r9, r10, r9 +; CHECK-PWR9-LE-NEXT: or r7, r8, r7 ; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31 +; CHECK-PWR9-LE-NEXT: rldimi r3, r7, 32, 0 ; CHECK-PWR9-LE-NEXT: xor r9, r9, r10 ; CHECK-PWR9-LE-NEXT: sub r9, r9, r10 -; CHECK-PWR9-LE-NEXT: li r10, 7 -; CHECK-PWR9-LE-NEXT: vextubrx r11, r10, v2 +; CHECK-PWR9-LE-NEXT: li r10, 9 +; CHECK-PWR9-LE-NEXT: vextubrx r0, r10, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r10, r10, v3 -; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 -; CHECK-PWR9-LE-NEXT: sub r10, r11, r10 -; CHECK-PWR9-LE-NEXT: srawi r11, r10, 31 -; CHECK-PWR9-LE-NEXT: xor r10, r10, r11 -; CHECK-PWR9-LE-NEXT: sub r10, r10, r11 -; CHECK-PWR9-LE-NEXT: li r11, 8 -; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2 -; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v5, r10 -; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24 -; CHECK-PWR9-LE-NEXT: sub r11, r12, r11 -; CHECK-PWR9-LE-NEXT: srawi r12, r11, 31 -; CHECK-PWR9-LE-NEXT: xor r11, r11, r12 -; CHECK-PWR9-LE-NEXT: sub r11, r11, r12 -; CHECK-PWR9-LE-NEXT: li r12, 9 -; CHECK-PWR9-LE-NEXT: vextubrx r0, r12, v2 -; CHECK-PWR9-LE-NEXT: vextubrx r12, r12, v3 ; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24 -; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24 -; CHECK-PWR9-LE-NEXT: sub r12, r0, r12 -; CHECK-PWR9-LE-NEXT: srawi r0, r12, 31 -; CHECK-PWR9-LE-NEXT: xor r12, r12, r0 -; CHECK-PWR9-LE-NEXT: sub r12, r12, r0 +; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 +; CHECK-PWR9-LE-NEXT: sub r10, r0, r10 +; CHECK-PWR9-LE-NEXT: srawi r0, r10, 31 +; CHECK-PWR9-LE-NEXT: xor r10, r10, r0 +; CHECK-PWR9-LE-NEXT: sub r10, r10, r0 ; CHECK-PWR9-LE-NEXT: li r0, 10 ; CHECK-PWR9-LE-NEXT: vextubrx r30, r0, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r0, r0, v3 @@ -400,6 +411,8 @@ ; CHECK-PWR9-LE-NEXT: li r29, 12 ; CHECK-PWR9-LE-NEXT: vextubrx r28, r29, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r29, r29, v3 +; CHECK-PWR9-LE-NEXT: slwi r6, r30, 24 +; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24 ; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24 ; CHECK-PWR9-LE-NEXT: sub r29, r28, r29 @@ -427,47 +440,30 @@ ; CHECK-PWR9-LE-NEXT: li r26, 15 ; CHECK-PWR9-LE-NEXT: vextubrx r25, r26, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r26, r26, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v2, r6 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r4 -; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r3 +; CHECK-PWR9-LE-NEXT: slwi r4, r27, 16 +; CHECK-PWR9-LE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-PWR9-LE-NEXT: clrlwi r25, r25, 24 ; CHECK-PWR9-LE-NEXT: clrlwi r26, r26, 24 -; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r8 ; CHECK-PWR9-LE-NEXT: sub r26, r25, r26 -; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r7 ; CHECK-PWR9-LE-NEXT: srawi r25, r26, 31 -; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r9 ; CHECK-PWR9-LE-NEXT: xor r26, r26, r25 -; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4 ; CHECK-PWR9-LE-NEXT: sub r26, r26, r25 ; CHECK-PWR9-LE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: mtvsrd v5, r26 +; CHECK-PWR9-LE-NEXT: slwi r5, r26, 24 ; CHECK-PWR9-LE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r30 -; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: xxmrglw vs0, v3, v2 -; CHECK-PWR9-LE-NEXT: mtvsrd v2, r11 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r12 -; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r0 -; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r28 +; CHECK-PWR9-LE-NEXT: or r4, r5, r4 +; CHECK-PWR9-LE-NEXT: slwi r5, r28, 8 ; CHECK-PWR9-LE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2 -; CHECK-PWR9-LE-NEXT: mtvsrd v3, r29 +; CHECK-PWR9-LE-NEXT: or r4, r4, r5 +; CHECK-PWR9-LE-NEXT: slwi r5, r0, 16 +; CHECK-PWR9-LE-NEXT: or r5, r6, r5 +; CHECK-PWR9-LE-NEXT: slwi r6, r10, 8 +; CHECK-PWR9-LE-NEXT: or r4, r4, r29 ; CHECK-PWR9-LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3 -; CHECK-PWR9-LE-NEXT: mtvsrd v4, r27 -; CHECK-PWR9-LE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4 -; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3 -; CHECK-PWR9-LE-NEXT: xxmrglw vs1, v3, v2 -; CHECK-PWR9-LE-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-PWR9-LE-NEXT: or r5, r5, r6 +; CHECK-PWR9-LE-NEXT: or r5, r5, r9 +; CHECK-PWR9-LE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-PWR9-LE-NEXT: mtvsrdd v2, r5, r3 ; CHECK-PWR9-LE-NEXT: blr ; ; CHECK-PWR9-BE-LABEL: sub_absv_8_ext: @@ -475,57 +471,69 @@ ; CHECK-PWR9-BE-NEXT: li r3, 0 ; CHECK-PWR9-BE-NEXT: li r4, 1 ; CHECK-PWR9-BE-NEXT: li r5, 2 +; CHECK-PWR9-BE-NEXT: li r6, 3 ; CHECK-PWR9-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-PWR9-BE-NEXT: vextublx r6, r3, v2 -; CHECK-PWR9-BE-NEXT: vextublx r3, r3, v3 -; CHECK-PWR9-BE-NEXT: vextublx r7, r4, v2 -; CHECK-PWR9-BE-NEXT: vextublx r4, r4, v3 +; CHECK-PWR9-BE-NEXT: li r8, 4 ; CHECK-PWR9-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-PWR9-BE-NEXT: vextublx r7, r3, v2 +; CHECK-PWR9-BE-NEXT: vextublx r3, r3, v3 +; CHECK-PWR9-BE-NEXT: vextublx r9, r4, v2 ; CHECK-PWR9-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-PWR9-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-PWR9-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-PWR9-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24 -; CHECK-PWR9-BE-NEXT: clrlwi r3, r3, 24 +; CHECK-PWR9-BE-NEXT: vextublx r4, r4, v3 ; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24 -; CHECK-PWR9-BE-NEXT: clrlwi r4, r4, 24 -; CHECK-PWR9-BE-NEXT: vextublx r8, r5, v2 +; CHECK-PWR9-BE-NEXT: vextublx r10, r5, v2 ; CHECK-PWR9-BE-NEXT: vextublx r5, r5, v3 -; CHECK-PWR9-BE-NEXT: sub r3, r6, r3 -; CHECK-PWR9-BE-NEXT: sub r4, r7, r4 -; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24 +; CHECK-PWR9-BE-NEXT: clrlwi r3, r3, 24 +; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24 +; CHECK-PWR9-BE-NEXT: clrlwi r4, r4, 24 +; CHECK-PWR9-BE-NEXT: vextublx r11, r6, v2 +; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24 +; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3 +; CHECK-PWR9-BE-NEXT: clrlwi r5, r5, 24 +; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24 +; CHECK-PWR9-BE-NEXT: sub r3, r7, r3 +; CHECK-PWR9-BE-NEXT: sub r4, r9, r4 +; CHECK-PWR9-BE-NEXT: sub r5, r10, r5 +; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24 +; CHECK-PWR9-BE-NEXT: sub r6, r11, r6 +; CHECK-PWR9-BE-NEXT: srawi r7, r3, 31 +; CHECK-PWR9-BE-NEXT: xor r3, r3, r7 +; CHECK-PWR9-BE-NEXT: srawi r9, r4, 31 +; CHECK-PWR9-BE-NEXT: srawi r10, r5, 31 +; CHECK-PWR9-BE-NEXT: srawi r11, r6, 31 +; CHECK-PWR9-BE-NEXT: xor r4, r4, r9 +; CHECK-PWR9-BE-NEXT: xor r5, r5, r10 +; CHECK-PWR9-BE-NEXT: xor r12, r6, r11 +; CHECK-PWR9-BE-NEXT: sub r6, r3, r7 +; CHECK-PWR9-BE-NEXT: sub r7, r4, r9 +; CHECK-PWR9-BE-NEXT: sub r4, r5, r10 +; CHECK-PWR9-BE-NEXT: vextublx r5, r8, v2 +; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3 +; CHECK-PWR9-BE-NEXT: sub r3, r12, r11 +; CHECK-PWR9-BE-NEXT: li r11, 7 +; CHECK-PWR9-BE-NEXT: slwi r7, r7, 16 +; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2 +; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3 +; CHECK-PWR9-BE-NEXT: slwi r6, r6, 24 +; CHECK-PWR9-BE-NEXT: slwi r4, r4, 8 ; CHECK-PWR9-BE-NEXT: clrlwi r5, r5, 24 -; CHECK-PWR9-BE-NEXT: sub r5, r8, r5 -; CHECK-PWR9-BE-NEXT: srawi r6, r3, 31 -; CHECK-PWR9-BE-NEXT: srawi r7, r4, 31 +; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24 +; CHECK-PWR9-BE-NEXT: or r6, r6, r7 +; CHECK-PWR9-BE-NEXT: sub r5, r5, r8 +; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24 +; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24 +; CHECK-PWR9-BE-NEXT: or r4, r6, r4 ; CHECK-PWR9-BE-NEXT: srawi r8, r5, 31 -; CHECK-PWR9-BE-NEXT: xor r3, r3, r6 -; CHECK-PWR9-BE-NEXT: xor r4, r4, r7 +; CHECK-PWR9-BE-NEXT: or r3, r4, r3 ; CHECK-PWR9-BE-NEXT: xor r5, r5, r8 -; CHECK-PWR9-BE-NEXT: sub r3, r3, r6 -; CHECK-PWR9-BE-NEXT: li r6, 3 -; CHECK-PWR9-BE-NEXT: sub r4, r4, r7 ; CHECK-PWR9-BE-NEXT: sub r5, r5, r8 -; CHECK-PWR9-BE-NEXT: vextublx r7, r6, v2 -; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3 -; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24 -; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24 -; CHECK-PWR9-BE-NEXT: sub r6, r7, r6 -; CHECK-PWR9-BE-NEXT: srawi r7, r6, 31 -; CHECK-PWR9-BE-NEXT: xor r6, r6, r7 -; CHECK-PWR9-BE-NEXT: sub r6, r6, r7 -; CHECK-PWR9-BE-NEXT: li r7, 4 -; CHECK-PWR9-BE-NEXT: vextublx r8, r7, v2 -; CHECK-PWR9-BE-NEXT: vextublx r7, r7, v3 -; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24 -; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24 -; CHECK-PWR9-BE-NEXT: sub r7, r8, r7 -; CHECK-PWR9-BE-NEXT: srawi r8, r7, 31 -; CHECK-PWR9-BE-NEXT: xor r7, r7, r8 -; CHECK-PWR9-BE-NEXT: sub r7, r7, r8 ; CHECK-PWR9-BE-NEXT: li r8, 5 ; CHECK-PWR9-BE-NEXT: vextublx r9, r8, v2 ; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3 +; CHECK-PWR9-BE-NEXT: slwi r5, r5, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24 ; CHECK-PWR9-BE-NEXT: sub r8, r9, r8 @@ -535,25 +543,25 @@ ; CHECK-PWR9-BE-NEXT: li r9, 6 ; CHECK-PWR9-BE-NEXT: vextublx r10, r9, v2 ; CHECK-PWR9-BE-NEXT: vextublx r9, r9, v3 +; CHECK-PWR9-BE-NEXT: slwi r4, r8, 16 +; CHECK-PWR9-BE-NEXT: or r4, r5, r4 ; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24 ; CHECK-PWR9-BE-NEXT: sub r9, r10, r9 ; CHECK-PWR9-BE-NEXT: srawi r10, r9, 31 ; CHECK-PWR9-BE-NEXT: xor r9, r9, r10 ; CHECK-PWR9-BE-NEXT: sub r9, r9, r10 -; CHECK-PWR9-BE-NEXT: li r10, 7 -; CHECK-PWR9-BE-NEXT: vextublx r11, r10, v2 -; CHECK-PWR9-BE-NEXT: vextublx r10, r10, v3 -; CHECK-PWR9-BE-NEXT: mtfprwz f2, r9 -; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24 -; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24 -; CHECK-PWR9-BE-NEXT: sub r10, r11, r10 +; CHECK-PWR9-BE-NEXT: sub r10, r12, r11 ; CHECK-PWR9-BE-NEXT: srawi r11, r10, 31 +; CHECK-PWR9-BE-NEXT: slwi r5, r9, 8 ; CHECK-PWR9-BE-NEXT: xor r10, r10, r11 +; CHECK-PWR9-BE-NEXT: or r4, r4, r5 ; CHECK-PWR9-BE-NEXT: sub r10, r10, r11 ; CHECK-PWR9-BE-NEXT: li r11, 8 ; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2 ; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3 +; CHECK-PWR9-BE-NEXT: or r4, r4, r10 +; CHECK-PWR9-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24 ; CHECK-PWR9-BE-NEXT: sub r11, r12, r11 @@ -563,6 +571,7 @@ ; CHECK-PWR9-BE-NEXT: li r12, 9 ; CHECK-PWR9-BE-NEXT: vextublx r0, r12, v2 ; CHECK-PWR9-BE-NEXT: vextublx r12, r12, v3 +; CHECK-PWR9-BE-NEXT: slwi r11, r11, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24 ; CHECK-PWR9-BE-NEXT: sub r12, r0, r12 @@ -572,7 +581,8 @@ ; CHECK-PWR9-BE-NEXT: li r0, 10 ; CHECK-PWR9-BE-NEXT: vextublx r30, r0, v2 ; CHECK-PWR9-BE-NEXT: vextublx r0, r0, v3 -; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r12 +; CHECK-PWR9-BE-NEXT: slwi r12, r12, 16 +; CHECK-PWR9-BE-NEXT: or r11, r11, r12 ; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24 ; CHECK-PWR9-BE-NEXT: sub r0, r30, r0 @@ -582,6 +592,8 @@ ; CHECK-PWR9-BE-NEXT: li r30, 11 ; CHECK-PWR9-BE-NEXT: vextublx r29, r30, v2 ; CHECK-PWR9-BE-NEXT: vextublx r30, r30, v3 +; CHECK-PWR9-BE-NEXT: slwi r12, r0, 8 +; CHECK-PWR9-BE-NEXT: or r11, r11, r12 ; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24 ; CHECK-PWR9-BE-NEXT: sub r30, r29, r30 @@ -591,6 +603,8 @@ ; CHECK-PWR9-BE-NEXT: li r29, 12 ; CHECK-PWR9-BE-NEXT: vextublx r28, r29, v2 ; CHECK-PWR9-BE-NEXT: vextublx r29, r29, v3 +; CHECK-PWR9-BE-NEXT: or r11, r11, r30 +; CHECK-PWR9-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24 ; CHECK-PWR9-BE-NEXT: sub r29, r28, r29 @@ -600,6 +614,8 @@ ; CHECK-PWR9-BE-NEXT: li r28, 13 ; CHECK-PWR9-BE-NEXT: vextublx r27, r28, v2 ; CHECK-PWR9-BE-NEXT: vextublx r28, r28, v3 +; CHECK-PWR9-BE-NEXT: slwi r0, r29, 24 +; CHECK-PWR9-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24 ; CHECK-PWR9-BE-NEXT: sub r28, r27, r28 @@ -609,6 +625,9 @@ ; CHECK-PWR9-BE-NEXT: li r27, 14 ; CHECK-PWR9-BE-NEXT: vextublx r26, r27, v2 ; CHECK-PWR9-BE-NEXT: vextublx r27, r27, v3 +; CHECK-PWR9-BE-NEXT: slwi r12, r28, 16 +; CHECK-PWR9-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-PWR9-BE-NEXT: or r12, r0, r12 ; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24 ; CHECK-PWR9-BE-NEXT: sub r27, r26, r27 @@ -618,332 +637,300 @@ ; CHECK-PWR9-BE-NEXT: li r26, 15 ; CHECK-PWR9-BE-NEXT: vextublx r25, r26, v2 ; CHECK-PWR9-BE-NEXT: vextublx r26, r26, v3 -; CHECK-PWR9-BE-NEXT: mtfprwz f0, r27 -; CHECK-PWR9-BE-NEXT: addis r27, r2, .LCPI9_0@toc@ha -; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r28 -; CHECK-PWR9-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-PWR9-BE-NEXT: addi r27, r27, .LCPI9_0@toc@l +; CHECK-PWR9-BE-NEXT: slwi r0, r27, 8 +; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-PWR9-BE-NEXT: or r12, r12, r0 ; CHECK-PWR9-BE-NEXT: clrlwi r25, r25, 24 ; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24 -; CHECK-PWR9-BE-NEXT: lxv vs1, 0(r27) -; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-PWR9-BE-NEXT: sub r26, r25, r26 ; CHECK-PWR9-BE-NEXT: srawi r25, r26, 31 ; CHECK-PWR9-BE-NEXT: xor r26, r26, r25 ; CHECK-PWR9-BE-NEXT: sub r26, r26, r25 ; CHECK-PWR9-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r26 +; CHECK-PWR9-BE-NEXT: or r12, r12, r26 ; CHECK-PWR9-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-PWR9-BE-NEXT: xxperm v2, vs0, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f0, r29 -; CHECK-PWR9-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f0, r0 -; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r30 -; CHECK-PWR9-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f0, r11 -; CHECK-PWR9-BE-NEXT: xxperm v4, vs0, vs1 -; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r4 -; CHECK-PWR9-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r10 -; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r8 -; CHECK-PWR9-BE-NEXT: xxperm v2, vs2, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f2, r7 -; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f2, r5 -; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r6 -; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1 -; CHECK-PWR9-BE-NEXT: mtfprwz f2, r3 -; CHECK-PWR9-BE-NEXT: xxperm v4, vs2, vs1 -; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-PWR9-BE-NEXT: xxmrghw vs1, v3, v2 -; CHECK-PWR9-BE-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-PWR9-BE-NEXT: rldimi r12, r11, 32, 0 +; CHECK-PWR9-BE-NEXT: mtvsrdd v2, r4, r12 ; CHECK-PWR9-BE-NEXT: blr ; ; CHECK-PWR8-LABEL: sub_absv_8_ext: ; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: mfvsrd r6, v3 ; CHECK-PWR8-NEXT: xxswapd vs0, v2 -; CHECK-PWR8-NEXT: mfvsrd r5, v2 +; CHECK-PWR8-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: mfvsrd r6, v3 +; CHECK-PWR8-NEXT: std r24, -64(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r21, -88(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: mfvsrd r5, v2 ; CHECK-PWR8-NEXT: xxswapd vs1, v3 -; CHECK-PWR8-NEXT: clrldi r3, r5, 56 -; CHECK-PWR8-NEXT: rldicl r7, r5, 56, 56 +; CHECK-PWR8-NEXT: rldicl r7, r6, 32, 56 ; CHECK-PWR8-NEXT: clrldi r4, r6, 56 -; CHECK-PWR8-NEXT: rldicl r8, r6, 56, 56 +; CHECK-PWR8-NEXT: std r7, -160(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r7, r5, 24, 56 +; CHECK-PWR8-NEXT: clrldi r3, r5, 56 +; CHECK-PWR8-NEXT: clrlwi r4, r4, 24 ; CHECK-PWR8-NEXT: mffprd r26, f0 -; CHECK-PWR8-NEXT: clrlwi r3, r3, 24 -; CHECK-PWR8-NEXT: clrlwi r7, r7, 24 -; CHECK-PWR8-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r7, -168(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r7, r6, 24, 56 +; CHECK-PWR8-NEXT: rldicl r12, r5, 56, 56 +; CHECK-PWR8-NEXT: mffprd r24, f1 +; CHECK-PWR8-NEXT: clrlwi r29, r12, 24 ; CHECK-PWR8-NEXT: rldicl r11, r5, 40, 56 -; CHECK-PWR8-NEXT: rldicl r12, r6, 40, 56 -; CHECK-PWR8-NEXT: clrlwi r4, r4, 24 -; CHECK-PWR8-NEXT: clrlwi r8, r8, 24 +; CHECK-PWR8-NEXT: std r7, -176(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r7, r5, 16, 56 +; CHECK-PWR8-NEXT: clrlwi r0, r11, 24 +; CHECK-PWR8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r7, -184(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: clrlwi r7, r3, 24 +; CHECK-PWR8-NEXT: rldicl r3, r26, 24, 56 +; CHECK-PWR8-NEXT: sub r12, r7, r4 ; CHECK-PWR8-NEXT: rldicl r9, r5, 48, 56 +; CHECK-PWR8-NEXT: ld r4, -168(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: rldicl r10, r6, 48, 56 -; CHECK-PWR8-NEXT: sub r4, r3, r4 -; CHECK-PWR8-NEXT: clrlwi r11, r11, 24 -; CHECK-PWR8-NEXT: rldicl r3, r26, 16, 56 -; CHECK-PWR8-NEXT: clrlwi r12, r12, 24 -; CHECK-PWR8-NEXT: sub r7, r7, r8 -; CHECK-PWR8-NEXT: clrlwi r9, r9, 24 -; CHECK-PWR8-NEXT: clrlwi r10, r10, 24 -; CHECK-PWR8-NEXT: std r24, -64(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: mffprd r24, f1 -; CHECK-PWR8-NEXT: rldicl r0, r5, 32, 56 -; CHECK-PWR8-NEXT: rldicl r30, r6, 32, 56 -; CHECK-PWR8-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: sub r11, r11, r12 -; CHECK-PWR8-NEXT: sub r9, r9, r10 -; CHECK-PWR8-NEXT: srawi r3, r4, 31 -; CHECK-PWR8-NEXT: srawi r12, r7, 31 -; CHECK-PWR8-NEXT: clrlwi r10, r0, 24 -; CHECK-PWR8-NEXT: clrlwi r0, r30, 24 -; CHECK-PWR8-NEXT: xor r4, r4, r3 -; CHECK-PWR8-NEXT: xor r7, r7, r12 -; CHECK-PWR8-NEXT: sub r10, r10, r0 -; CHECK-PWR8-NEXT: std r20, -96(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: sub r3, r4, r3 -; CHECK-PWR8-NEXT: srawi r4, r9, 31 -; CHECK-PWR8-NEXT: sub r7, r7, r12 -; CHECK-PWR8-NEXT: std r22, -80(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: rldicl r29, r5, 24, 56 -; CHECK-PWR8-NEXT: rldicl r28, r6, 24, 56 -; CHECK-PWR8-NEXT: xor r9, r9, r4 -; CHECK-PWR8-NEXT: mtvsrd v3, r7 -; CHECK-PWR8-NEXT: rldicl r27, r5, 16, 56 +; CHECK-PWR8-NEXT: rldicl r27, r6, 40, 56 +; CHECK-PWR8-NEXT: std r3, -192(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: ld r3, -160(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: rldicl r21, r5, 32, 56 ; CHECK-PWR8-NEXT: rldicl r25, r6, 16, 56 -; CHECK-PWR8-NEXT: clrlwi r30, r29, 24 -; CHECK-PWR8-NEXT: clrlwi r29, r28, 24 -; CHECK-PWR8-NEXT: mtvsrd v2, r3 -; CHECK-PWR8-NEXT: sub r4, r9, r4 -; CHECK-PWR8-NEXT: srawi r7, r10, 31 -; CHECK-PWR8-NEXT: srawi r3, r11, 31 -; CHECK-PWR8-NEXT: clrlwi r9, r27, 24 -; CHECK-PWR8-NEXT: clrlwi r12, r25, 24 -; CHECK-PWR8-NEXT: sub r0, r30, r29 -; CHECK-PWR8-NEXT: mtvsrd v4, r4 -; CHECK-PWR8-NEXT: std r23, -72(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: xor r10, r10, r7 -; CHECK-PWR8-NEXT: xor r11, r11, r3 -; CHECK-PWR8-NEXT: sub r9, r9, r12 -; CHECK-PWR8-NEXT: std r18, -112(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r19, -104(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: vmrghb v2, v3, v2 -; CHECK-PWR8-NEXT: sub r7, r10, r7 -; CHECK-PWR8-NEXT: rldicl r5, r5, 8, 56 -; CHECK-PWR8-NEXT: sub r3, r11, r3 -; CHECK-PWR8-NEXT: rldicl r6, r6, 8, 56 -; CHECK-PWR8-NEXT: srawi r4, r0, 31 -; CHECK-PWR8-NEXT: mtvsrd v0, r7 +; CHECK-PWR8-NEXT: std r14, -144(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std r16, -128(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r17, -120(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: srawi r7, r9, 31 +; CHECK-PWR8-NEXT: std r18, -112(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: clrlwi r11, r4, 24 +; CHECK-PWR8-NEXT: rldicl r28, r5, 8, 56 +; CHECK-PWR8-NEXT: ld r4, -176(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: rldicl r30, r6, 8, 56 +; CHECK-PWR8-NEXT: rldicl r5, r24, 56, 56 +; CHECK-PWR8-NEXT: std r22, -80(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r2, -152(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r8, r6, 56, 56 ; CHECK-PWR8-NEXT: clrldi r23, r26, 56 -; CHECK-PWR8-NEXT: mtvsrd v5, r3 -; CHECK-PWR8-NEXT: clrlwi r3, r5, 24 -; CHECK-PWR8-NEXT: clrlwi r5, r6, 24 +; CHECK-PWR8-NEXT: std r5, -200(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: clrlwi r7, r4, 24 ; CHECK-PWR8-NEXT: clrldi r22, r24, 56 -; CHECK-PWR8-NEXT: rldicl r21, r26, 56, 56 -; CHECK-PWR8-NEXT: xor r10, r0, r4 -; CHECK-PWR8-NEXT: xor r9, r9, r7 -; CHECK-PWR8-NEXT: rldicl r20, r24, 56, 56 -; CHECK-PWR8-NEXT: rldicl r19, r26, 48, 56 -; CHECK-PWR8-NEXT: sub r3, r3, r5 -; CHECK-PWR8-NEXT: sub r4, r10, r4 -; CHECK-PWR8-NEXT: sub r7, r9, r7 -; CHECK-PWR8-NEXT: clrlwi r9, r23, 24 ; CHECK-PWR8-NEXT: rldicl r18, r24, 48, 56 -; CHECK-PWR8-NEXT: clrlwi r10, r22, 24 -; CHECK-PWR8-NEXT: clrlwi r11, r21, 24 -; CHECK-PWR8-NEXT: clrlwi r12, r20, 24 -; CHECK-PWR8-NEXT: mtvsrd v1, r4 -; CHECK-PWR8-NEXT: std r14, -144(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r15, -136(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: rldicl r17, r26, 40, 56 ; CHECK-PWR8-NEXT: rldicl r16, r24, 40, 56 -; CHECK-PWR8-NEXT: sub r9, r9, r10 -; CHECK-PWR8-NEXT: sub r10, r11, r12 -; CHECK-PWR8-NEXT: mtvsrd v3, r7 -; CHECK-PWR8-NEXT: srawi r4, r3, 31 -; CHECK-PWR8-NEXT: clrlwi r11, r19, 24 -; CHECK-PWR8-NEXT: clrlwi r12, r18, 24 -; CHECK-PWR8-NEXT: vmrghb v4, v5, v4 -; CHECK-PWR8-NEXT: std r31, -8(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: xor r3, r3, r4 -; CHECK-PWR8-NEXT: sub r7, r11, r12 -; CHECK-PWR8-NEXT: clrlwi r11, r17, 24 -; CHECK-PWR8-NEXT: clrlwi r12, r16, 24 -; CHECK-PWR8-NEXT: vmrghb v0, v1, v0 -; CHECK-PWR8-NEXT: std r2, -152(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: rldicl r15, r26, 32, 56 +; CHECK-PWR8-NEXT: ld r4, -184(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: rldicl r14, r24, 32, 56 -; CHECK-PWR8-NEXT: sub r3, r3, r4 -; CHECK-PWR8-NEXT: sub r11, r11, r12 -; CHECK-PWR8-NEXT: srawi r4, r9, 31 -; CHECK-PWR8-NEXT: srawi r12, r10, 31 -; CHECK-PWR8-NEXT: clrlwi r0, r15, 24 -; CHECK-PWR8-NEXT: clrlwi r30, r14, 24 -; CHECK-PWR8-NEXT: mtvsrd v5, r3 -; CHECK-PWR8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: xor r9, r9, r4 -; CHECK-PWR8-NEXT: xor r10, r10, r12 -; CHECK-PWR8-NEXT: sub r3, r0, r30 -; CHECK-PWR8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: srawi r28, r11, 31 -; CHECK-PWR8-NEXT: sub r4, r9, r4 -; CHECK-PWR8-NEXT: sub r10, r10, r12 -; CHECK-PWR8-NEXT: vmrghb v3, v5, v3 -; CHECK-PWR8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: srawi r29, r7, 31 -; CHECK-PWR8-NEXT: srawi r9, r3, 31 -; CHECK-PWR8-NEXT: mtvsrd v5, r4 -; CHECK-PWR8-NEXT: xor r4, r11, r28 -; CHECK-PWR8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: mtvsrd v1, r10 -; CHECK-PWR8-NEXT: ld r10, -160(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: rldicl r31, r26, 24, 56 -; CHECK-PWR8-NEXT: rldicl r2, r24, 24, 56 -; CHECK-PWR8-NEXT: xor r7, r7, r29 -; CHECK-PWR8-NEXT: xor r3, r3, r9 -; CHECK-PWR8-NEXT: rldicl r8, r24, 16, 56 -; CHECK-PWR8-NEXT: rldicl r6, r26, 8, 56 -; CHECK-PWR8-NEXT: sub r4, r4, r28 -; CHECK-PWR8-NEXT: clrlwi r0, r31, 24 -; CHECK-PWR8-NEXT: clrlwi r30, r2, 24 -; CHECK-PWR8-NEXT: sub r7, r7, r29 -; CHECK-PWR8-NEXT: rldicl r5, r24, 8, 56 +; CHECK-PWR8-NEXT: clrlwi r9, r9, 24 ; CHECK-PWR8-NEXT: clrlwi r10, r10, 24 +; CHECK-PWR8-NEXT: clrlwi r5, r27, 24 +; CHECK-PWR8-NEXT: clrlwi r6, r21, 24 +; CHECK-PWR8-NEXT: rldicl r2, r24, 24, 56 +; CHECK-PWR8-NEXT: rldicl r21, r24, 16, 56 +; CHECK-PWR8-NEXT: clrlwi r25, r25, 24 +; CHECK-PWR8-NEXT: clrlwi r28, r28, 24 +; CHECK-PWR8-NEXT: clrlwi r30, r30, 24 +; CHECK-PWR8-NEXT: std r17, -120(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r27, r24, 8, 56 +; CHECK-PWR8-NEXT: clrlwi r24, r4, 24 +; CHECK-PWR8-NEXT: std r19, -104(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: clrlwi r8, r8, 24 -; CHECK-PWR8-NEXT: sub r3, r3, r9 -; CHECK-PWR8-NEXT: mtvsrd v7, r4 -; CHECK-PWR8-NEXT: clrlwi r4, r6, 24 -; CHECK-PWR8-NEXT: clrlwi r5, r5, 24 -; CHECK-PWR8-NEXT: sub r0, r0, r30 -; CHECK-PWR8-NEXT: mtvsrd v6, r7 -; CHECK-PWR8-NEXT: sub r7, r10, r8 +; CHECK-PWR8-NEXT: rldicl r31, r26, 56, 56 +; CHECK-PWR8-NEXT: rldicl r19, r26, 48, 56 +; CHECK-PWR8-NEXT: clrlwi r3, r3, 24 +; CHECK-PWR8-NEXT: rldicl r17, r26, 40, 56 +; CHECK-PWR8-NEXT: sub r9, r9, r10 +; CHECK-PWR8-NEXT: sub r5, r0, r5 +; CHECK-PWR8-NEXT: sub r11, r11, r7 +; CHECK-PWR8-NEXT: sub r7, r24, r25 +; CHECK-PWR8-NEXT: sub r30, r28, r30 +; CHECK-PWR8-NEXT: clrlwi r23, r23, 24 +; CHECK-PWR8-NEXT: clrlwi r22, r22, 24 +; CHECK-PWR8-NEXT: sub r8, r29, r8 +; CHECK-PWR8-NEXT: sub r4, r6, r3 +; CHECK-PWR8-NEXT: srawi r29, r9, 31 +; CHECK-PWR8-NEXT: srawi r25, r5, 31 +; CHECK-PWR8-NEXT: sub r23, r23, r22 +; CHECK-PWR8-NEXT: clrlwi r22, r31, 24 +; CHECK-PWR8-NEXT: ld r31, -200(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: clrlwi r19, r19, 24 +; CHECK-PWR8-NEXT: srawi r0, r7, 31 +; CHECK-PWR8-NEXT: srawi r3, r30, 31 +; CHECK-PWR8-NEXT: clrlwi r18, r18, 24 +; CHECK-PWR8-NEXT: clrlwi r17, r17, 24 +; CHECK-PWR8-NEXT: clrlwi r16, r16, 24 +; CHECK-PWR8-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r20, -96(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: rldicl r15, r26, 32, 56 +; CHECK-PWR8-NEXT: rldicl r20, r26, 16, 56 +; CHECK-PWR8-NEXT: sub r19, r19, r18 +; CHECK-PWR8-NEXT: sub r18, r17, r16 +; CHECK-PWR8-NEXT: rldicl r26, r26, 8, 56 +; CHECK-PWR8-NEXT: srawi r6, r8, 31 +; CHECK-PWR8-NEXT: ld r16, -192(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: xor r9, r9, r29 +; CHECK-PWR8-NEXT: srawi r28, r11, 31 +; CHECK-PWR8-NEXT: xor r5, r5, r25 +; CHECK-PWR8-NEXT: xor r7, r7, r0 +; CHECK-PWR8-NEXT: xor r30, r30, r3 +; CHECK-PWR8-NEXT: clrlwi r31, r31, 24 +; CHECK-PWR8-NEXT: srawi r10, r12, 31 +; CHECK-PWR8-NEXT: clrlwi r15, r15, 24 +; CHECK-PWR8-NEXT: clrlwi r14, r14, 24 +; CHECK-PWR8-NEXT: clrlwi r20, r20, 24 +; CHECK-PWR8-NEXT: clrlwi r21, r21, 24 +; CHECK-PWR8-NEXT: clrlwi r26, r26, 24 +; CHECK-PWR8-NEXT: clrlwi r27, r27, 24 +; CHECK-PWR8-NEXT: xor r8, r8, r6 +; CHECK-PWR8-NEXT: sub r9, r9, r29 +; CHECK-PWR8-NEXT: sub r5, r5, r25 +; CHECK-PWR8-NEXT: xor r11, r11, r28 +; CHECK-PWR8-NEXT: sub r7, r7, r0 +; CHECK-PWR8-NEXT: sub r3, r30, r3 +; CHECK-PWR8-NEXT: sub r22, r22, r31 +; CHECK-PWR8-NEXT: sub r17, r15, r14 +; CHECK-PWR8-NEXT: clrlwi r16, r16, 24 +; CHECK-PWR8-NEXT: clrlwi r15, r2, 24 +; CHECK-PWR8-NEXT: xor r12, r12, r10 +; CHECK-PWR8-NEXT: sub r21, r20, r21 +; CHECK-PWR8-NEXT: sub r27, r26, r27 +; CHECK-PWR8-NEXT: sub r6, r8, r6 +; CHECK-PWR8-NEXT: sub r11, r11, r28 +; CHECK-PWR8-NEXT: slwi r7, r7, 16 +; CHECK-PWR8-NEXT: slwi r3, r3, 24 +; CHECK-PWR8-NEXT: srawi r8, r19, 31 +; CHECK-PWR8-NEXT: slwi r9, r9, 16 +; CHECK-PWR8-NEXT: slwi r5, r5, 24 +; CHECK-PWR8-NEXT: sub r16, r16, r15 +; CHECK-PWR8-NEXT: sub r10, r12, r10 +; CHECK-PWR8-NEXT: or r3, r3, r7 +; CHECK-PWR8-NEXT: srawi r24, r4, 31 +; CHECK-PWR8-NEXT: srawi r20, r22, 31 +; CHECK-PWR8-NEXT: or r5, r5, r9 +; CHECK-PWR8-NEXT: slwi r9, r11, 8 +; CHECK-PWR8-NEXT: slwi r6, r6, 8 +; CHECK-PWR8-NEXT: xor r11, r19, r8 +; CHECK-PWR8-NEXT: srawi r12, r18, 31 +; CHECK-PWR8-NEXT: srawi r0, r21, 31 +; CHECK-PWR8-NEXT: xor r4, r4, r24 +; CHECK-PWR8-NEXT: xor r28, r22, r20 +; CHECK-PWR8-NEXT: sub r8, r11, r8 +; CHECK-PWR8-NEXT: or r3, r3, r9 +; CHECK-PWR8-NEXT: srawi r30, r27, 31 +; CHECK-PWR8-NEXT: srawi r25, r16, 31 +; CHECK-PWR8-NEXT: or r5, r5, r6 +; CHECK-PWR8-NEXT: xor r6, r18, r12 +; CHECK-PWR8-NEXT: xor r9, r21, r0 +; CHECK-PWR8-NEXT: sub r4, r4, r24 +; CHECK-PWR8-NEXT: xor r11, r27, r30 +; CHECK-PWR8-NEXT: sub r7, r28, r20 +; CHECK-PWR8-NEXT: xor r28, r16, r25 +; CHECK-PWR8-NEXT: sub r6, r6, r12 +; CHECK-PWR8-NEXT: sub r9, r9, r0 +; CHECK-PWR8-NEXT: or r3, r3, r4 +; CHECK-PWR8-NEXT: sub r11, r11, r30 +; CHECK-PWR8-NEXT: srawi r26, r23, 31 +; CHECK-PWR8-NEXT: or r4, r5, r10 +; CHECK-PWR8-NEXT: sub r5, r28, r25 +; CHECK-PWR8-NEXT: slwi r9, r9, 16 +; CHECK-PWR8-NEXT: slwi r8, r8, 16 +; CHECK-PWR8-NEXT: srawi r29, r17, 31 +; CHECK-PWR8-NEXT: slwi r10, r11, 24 +; CHECK-PWR8-NEXT: slwi r6, r6, 24 +; CHECK-PWR8-NEXT: slwi r5, r5, 8 +; CHECK-PWR8-NEXT: slwi r7, r7, 8 ; CHECK-PWR8-NEXT: ld r2, -152(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-PWR8-NEXT: xor r3, r23, r26 +; CHECK-PWR8-NEXT: xor r11, r17, r29 +; CHECK-PWR8-NEXT: or r9, r10, r9 +; CHECK-PWR8-NEXT: or r6, r6, r8 ; CHECK-PWR8-NEXT: ld r31, -8(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: mtvsrd v8, r3 -; CHECK-PWR8-NEXT: sub r3, r4, r5 -; CHECK-PWR8-NEXT: srawi r12, r0, 31 +; CHECK-PWR8-NEXT: sub r3, r3, r26 +; CHECK-PWR8-NEXT: sub r8, r11, r29 +; CHECK-PWR8-NEXT: or r5, r9, r5 +; CHECK-PWR8-NEXT: or r6, r6, r7 +; CHECK-PWR8-NEXT: mtfprd f0, r4 ; CHECK-PWR8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: or r5, r5, r8 +; CHECK-PWR8-NEXT: or r3, r6, r3 ; CHECK-PWR8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: srawi r6, r7, 31 -; CHECK-PWR8-NEXT: srawi r5, r3, 31 -; CHECK-PWR8-NEXT: xor r8, r0, r12 -; CHECK-PWR8-NEXT: vmrghb v5, v1, v5 +; CHECK-PWR8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: rldimi r3, r5, 32, 0 +; CHECK-PWR8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r24, -64(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: xor r4, r7, r6 -; CHECK-PWR8-NEXT: xor r3, r3, r5 -; CHECK-PWR8-NEXT: sub r8, r8, r12 -; CHECK-PWR8-NEXT: vmrghb v6, v7, v6 +; CHECK-PWR8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: mtfprd f1, r3 +; CHECK-PWR8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r17, -120(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r16, -128(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: sub r4, r4, r6 -; CHECK-PWR8-NEXT: sub r3, r3, r5 -; CHECK-PWR8-NEXT: mtvsrd v9, r8 ; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: mtvsrd v1, r4 -; CHECK-PWR8-NEXT: mtvsrd v7, r3 -; CHECK-PWR8-NEXT: vmrghb v8, v9, v8 -; CHECK-PWR8-NEXT: vmrghb v1, v7, v1 -; CHECK-PWR8-NEXT: vmrglh v2, v4, v2 -; CHECK-PWR8-NEXT: vmrglh v3, v3, v0 -; CHECK-PWR8-NEXT: vmrglh v4, v6, v5 -; CHECK-PWR8-NEXT: vmrglh v5, v1, v8 -; CHECK-PWR8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-PWR8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-PWR8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-PWR8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-PWR8-NEXT: blr ; ; CHECK-PWR7-LABEL: sub_absv_8_ext: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: stdu r1, -416(r1) -; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 416 -; CHECK-PWR7-NEXT: .cfi_offset r23, -72 -; CHECK-PWR7-NEXT: .cfi_offset r24, -64 -; CHECK-PWR7-NEXT: .cfi_offset r25, -56 -; CHECK-PWR7-NEXT: .cfi_offset r26, -48 -; CHECK-PWR7-NEXT: .cfi_offset r27, -40 -; CHECK-PWR7-NEXT: .cfi_offset r28, -32 -; CHECK-PWR7-NEXT: .cfi_offset r29, -24 -; CHECK-PWR7-NEXT: .cfi_offset r30, -16 -; CHECK-PWR7-NEXT: addi r3, r1, 304 -; CHECK-PWR7-NEXT: std r23, 344(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: addi r4, r1, 320 -; CHECK-PWR7-NEXT: std r24, 352(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r25, 360(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r26, 368(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r27, 376(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r28, 384(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r29, 392(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r30, 400(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: addi r3, r1, -112 +; CHECK-PWR7-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: addi r4, r1, -96 +; CHECK-PWR7-NEXT: std r24, -64(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR7-NEXT: stxvw4x v2, 0, r3 -; CHECK-PWR7-NEXT: lbz r3, 304(r1) +; CHECK-PWR7-NEXT: lbz r3, -112(r1) ; CHECK-PWR7-NEXT: stxvw4x v3, 0, r4 -; CHECK-PWR7-NEXT: lbz r9, 307(r1) -; CHECK-PWR7-NEXT: lbz r10, 323(r1) -; CHECK-PWR7-NEXT: lbz r11, 308(r1) -; CHECK-PWR7-NEXT: lbz r12, 324(r1) -; CHECK-PWR7-NEXT: lbz r0, 309(r1) -; CHECK-PWR7-NEXT: lbz r30, 325(r1) +; CHECK-PWR7-NEXT: lbz r9, -109(r1) +; CHECK-PWR7-NEXT: lbz r10, -93(r1) +; CHECK-PWR7-NEXT: lbz r11, -108(r1) +; CHECK-PWR7-NEXT: lbz r12, -92(r1) +; CHECK-PWR7-NEXT: lbz r0, -107(r1) +; CHECK-PWR7-NEXT: lbz r30, -91(r1) ; CHECK-PWR7-NEXT: sub r9, r9, r10 -; CHECK-PWR7-NEXT: lbz r29, 310(r1) -; CHECK-PWR7-NEXT: lbz r28, 326(r1) +; CHECK-PWR7-NEXT: lbz r29, -106(r1) +; CHECK-PWR7-NEXT: lbz r28, -90(r1) ; CHECK-PWR7-NEXT: sub r11, r11, r12 -; CHECK-PWR7-NEXT: lbz r27, 311(r1) -; CHECK-PWR7-NEXT: lbz r26, 327(r1) +; CHECK-PWR7-NEXT: lbz r10, -101(r1) +; CHECK-PWR7-NEXT: lbz r12, -85(r1) ; CHECK-PWR7-NEXT: sub r0, r0, r30 -; CHECK-PWR7-NEXT: lbz r25, 312(r1) -; CHECK-PWR7-NEXT: lbz r24, 328(r1) +; CHECK-PWR7-NEXT: lbz r30, -100(r1) ; CHECK-PWR7-NEXT: sub r29, r29, r28 -; CHECK-PWR7-NEXT: lbz r10, 315(r1) -; CHECK-PWR7-NEXT: lbz r12, 331(r1) -; CHECK-PWR7-NEXT: sub r27, r27, r26 -; CHECK-PWR7-NEXT: lbz r30, 316(r1) -; CHECK-PWR7-NEXT: lbz r28, 332(r1) -; CHECK-PWR7-NEXT: sub r25, r25, r24 -; CHECK-PWR7-NEXT: lbz r4, 320(r1) -; CHECK-PWR7-NEXT: lbz r5, 305(r1) +; CHECK-PWR7-NEXT: lbz r28, -84(r1) +; CHECK-PWR7-NEXT: lbz r4, -96(r1) +; CHECK-PWR7-NEXT: lbz r27, -105(r1) ; CHECK-PWR7-NEXT: sub r10, r10, r12 -; CHECK-PWR7-NEXT: lbz r6, 321(r1) -; CHECK-PWR7-NEXT: lbz r26, 317(r1) +; CHECK-PWR7-NEXT: lbz r26, -89(r1) +; CHECK-PWR7-NEXT: lbz r25, -104(r1) ; CHECK-PWR7-NEXT: sub r30, r30, r28 -; CHECK-PWR7-NEXT: lbz r24, 333(r1) -; CHECK-PWR7-NEXT: lbz r12, 319(r1) +; CHECK-PWR7-NEXT: lbz r24, -88(r1) +; CHECK-PWR7-NEXT: lbz r5, -111(r1) ; CHECK-PWR7-NEXT: sub r3, r3, r4 -; CHECK-PWR7-NEXT: lbz r28, 335(r1) -; CHECK-PWR7-NEXT: lbz r7, 306(r1) +; CHECK-PWR7-NEXT: lbz r6, -95(r1) +; CHECK-PWR7-NEXT: lbz r12, -97(r1) +; CHECK-PWR7-NEXT: sub r27, r27, r26 +; CHECK-PWR7-NEXT: lbz r28, -81(r1) +; CHECK-PWR7-NEXT: lbz r7, -110(r1) +; CHECK-PWR7-NEXT: sub r25, r25, r24 +; CHECK-PWR7-NEXT: lbz r8, -94(r1) +; CHECK-PWR7-NEXT: lbz r26, -99(r1) ; CHECK-PWR7-NEXT: sub r5, r5, r6 -; CHECK-PWR7-NEXT: lbz r8, 322(r1) -; CHECK-PWR7-NEXT: sub r26, r26, r24 -; CHECK-PWR7-NEXT: srawi r24, r5, 31 -; CHECK-PWR7-NEXT: lbz r23, 313(r1) +; CHECK-PWR7-NEXT: lbz r24, -83(r1) ; CHECK-PWR7-NEXT: sub r12, r12, r28 ; CHECK-PWR7-NEXT: srawi r28, r3, 31 -; CHECK-PWR7-NEXT: xor r5, r5, r24 -; CHECK-PWR7-NEXT: lbz r4, 329(r1) +; CHECK-PWR7-NEXT: lbz r23, -103(r1) ; CHECK-PWR7-NEXT: sub r7, r7, r8 ; CHECK-PWR7-NEXT: xor r3, r3, r28 -; CHECK-PWR7-NEXT: lbz r6, 314(r1) -; CHECK-PWR7-NEXT: lbz r8, 330(r1) +; CHECK-PWR7-NEXT: lbz r4, -87(r1) +; CHECK-PWR7-NEXT: lbz r6, -102(r1) +; CHECK-PWR7-NEXT: sub r26, r26, r24 +; CHECK-PWR7-NEXT: srawi r24, r5, 31 +; CHECK-PWR7-NEXT: lbz r8, -86(r1) +; CHECK-PWR7-NEXT: xor r5, r5, r24 ; CHECK-PWR7-NEXT: sub r3, r3, r28 ; CHECK-PWR7-NEXT: srawi r28, r7, 31 ; CHECK-PWR7-NEXT: sub r5, r5, r24 @@ -960,19 +947,19 @@ ; CHECK-PWR7-NEXT: srawi r28, r29, 31 ; CHECK-PWR7-NEXT: sub r0, r0, r24 ; CHECK-PWR7-NEXT: srawi r24, r27, 31 -; CHECK-PWR7-NEXT: sub r4, r23, r4 ; CHECK-PWR7-NEXT: xor r29, r29, r28 -; CHECK-PWR7-NEXT: lbz r23, 318(r1) +; CHECK-PWR7-NEXT: sub r4, r23, r4 ; CHECK-PWR7-NEXT: xor r27, r27, r24 +; CHECK-PWR7-NEXT: lbz r23, -98(r1) ; CHECK-PWR7-NEXT: sub r29, r29, r28 ; CHECK-PWR7-NEXT: srawi r28, r25, 31 -; CHECK-PWR7-NEXT: sub r27, r27, r24 -; CHECK-PWR7-NEXT: srawi r24, r4, 31 ; CHECK-PWR7-NEXT: sub r6, r6, r8 +; CHECK-PWR7-NEXT: sub r27, r27, r24 ; CHECK-PWR7-NEXT: xor r25, r25, r28 -; CHECK-PWR7-NEXT: lbz r8, 334(r1) -; CHECK-PWR7-NEXT: xor r4, r4, r24 +; CHECK-PWR7-NEXT: lbz r8, -82(r1) +; CHECK-PWR7-NEXT: srawi r24, r4, 31 ; CHECK-PWR7-NEXT: sub r28, r25, r28 +; CHECK-PWR7-NEXT: xor r4, r4, r24 ; CHECK-PWR7-NEXT: srawi r25, r6, 31 ; CHECK-PWR7-NEXT: sub r4, r4, r24 ; CHECK-PWR7-NEXT: srawi r24, r10, 31 @@ -980,91 +967,66 @@ ; CHECK-PWR7-NEXT: xor r10, r10, r24 ; CHECK-PWR7-NEXT: sub r6, r6, r25 ; CHECK-PWR7-NEXT: srawi r25, r30, 31 +; CHECK-PWR7-NEXT: sub r8, r23, r8 +; CHECK-PWR7-NEXT: ld r23, -72(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: sub r10, r10, r24 ; CHECK-PWR7-NEXT: srawi r24, r26, 31 -; CHECK-PWR7-NEXT: sub r8, r23, r8 ; CHECK-PWR7-NEXT: xor r30, r30, r25 -; CHECK-PWR7-NEXT: ld r23, 344(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: xor r26, r26, r24 ; CHECK-PWR7-NEXT: sub r30, r30, r25 -; CHECK-PWR7-NEXT: srawi r25, r12, 31 +; CHECK-PWR7-NEXT: srawi r25, r8, 31 ; CHECK-PWR7-NEXT: sub r26, r26, r24 -; CHECK-PWR7-NEXT: srawi r24, r8, 31 -; CHECK-PWR7-NEXT: xor r12, r12, r25 -; CHECK-PWR7-NEXT: xor r8, r8, r24 -; CHECK-PWR7-NEXT: sub r12, r12, r25 -; CHECK-PWR7-NEXT: addi r25, r1, 272 -; CHECK-PWR7-NEXT: sub r8, r8, r24 -; CHECK-PWR7-NEXT: stb r12, 288(r1) -; CHECK-PWR7-NEXT: addi r12, r1, 288 -; CHECK-PWR7-NEXT: stb r8, 272(r1) -; CHECK-PWR7-NEXT: stb r26, 256(r1) -; CHECK-PWR7-NEXT: stb r30, 240(r1) -; CHECK-PWR7-NEXT: stb r10, 224(r1) -; CHECK-PWR7-NEXT: stb r6, 208(r1) -; CHECK-PWR7-NEXT: stb r4, 192(r1) -; CHECK-PWR7-NEXT: stb r28, 176(r1) -; CHECK-PWR7-NEXT: stb r27, 160(r1) -; CHECK-PWR7-NEXT: stb r29, 144(r1) -; CHECK-PWR7-NEXT: stb r0, 128(r1) -; CHECK-PWR7-NEXT: stb r11, 112(r1) -; CHECK-PWR7-NEXT: stb r9, 96(r1) -; CHECK-PWR7-NEXT: stb r7, 80(r1) -; CHECK-PWR7-NEXT: stb r5, 64(r1) -; CHECK-PWR7-NEXT: stb r3, 48(r1) -; CHECK-PWR7-NEXT: addi r8, r1, 256 -; CHECK-PWR7-NEXT: addi r26, r1, 240 -; CHECK-PWR7-NEXT: lxvw4x v2, 0, r12 -; CHECK-PWR7-NEXT: lxvw4x v3, 0, r25 -; CHECK-PWR7-NEXT: addi r10, r1, 224 -; CHECK-PWR7-NEXT: addi r30, r1, 208 -; CHECK-PWR7-NEXT: addi r3, r1, 192 -; CHECK-PWR7-NEXT: addi r4, r1, 176 -; CHECK-PWR7-NEXT: addi r5, r1, 160 -; CHECK-PWR7-NEXT: addi r6, r1, 144 -; CHECK-PWR7-NEXT: lxvw4x v4, 0, r8 -; CHECK-PWR7-NEXT: lxvw4x v5, 0, r26 -; CHECK-PWR7-NEXT: addi r7, r1, 128 -; CHECK-PWR7-NEXT: addi r8, r1, 112 -; CHECK-PWR7-NEXT: lxvw4x v0, 0, r10 -; CHECK-PWR7-NEXT: lxvw4x v1, 0, r30 -; CHECK-PWR7-NEXT: vmrghb v2, v3, v2 -; CHECK-PWR7-NEXT: addi r9, r1, 96 -; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3 -; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4 -; CHECK-PWR7-NEXT: addi r3, r1, 80 -; CHECK-PWR7-NEXT: addi r4, r1, 64 -; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5 -; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6 -; CHECK-PWR7-NEXT: addi r5, r1, 48 -; CHECK-PWR7-NEXT: vmrghb v4, v5, v4 -; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7 -; CHECK-PWR7-NEXT: lxvw4x v9, 0, r8 -; CHECK-PWR7-NEXT: vmrghb v0, v1, v0 -; CHECK-PWR7-NEXT: lxvw4x v1, 0, r9 -; CHECK-PWR7-NEXT: lxvw4x v10, 0, r3 -; CHECK-PWR7-NEXT: vmrghb v6, v7, v6 -; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4 -; CHECK-PWR7-NEXT: vmrghb v3, v8, v3 -; CHECK-PWR7-NEXT: lxvw4x v8, 0, r5 -; CHECK-PWR7-NEXT: vmrghb v5, v9, v5 -; CHECK-PWR7-NEXT: ld r30, 400(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r29, 392(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: vmrghb v1, v10, v1 -; CHECK-PWR7-NEXT: ld r28, 384(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r27, 376(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: vmrghb v7, v8, v7 -; CHECK-PWR7-NEXT: ld r26, 368(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r25, 360(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: vmrghh v2, v4, v2 -; CHECK-PWR7-NEXT: ld r24, 352(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: vmrghh v4, v6, v0 -; CHECK-PWR7-NEXT: vmrghh v3, v5, v3 -; CHECK-PWR7-NEXT: vmrghh v5, v7, v1 -; CHECK-PWR7-NEXT: xxmrghw vs0, v4, v2 -; CHECK-PWR7-NEXT: xxmrghw vs1, v5, v3 +; CHECK-PWR7-NEXT: xor r8, r8, r25 +; CHECK-PWR7-NEXT: srawi r24, r12, 31 +; CHECK-PWR7-NEXT: sub r8, r8, r25 +; CHECK-PWR7-NEXT: slwi r26, r26, 16 +; CHECK-PWR7-NEXT: xor r12, r12, r24 +; CHECK-PWR7-NEXT: addi r25, r1, -128 +; CHECK-PWR7-NEXT: slwi r30, r30, 24 +; CHECK-PWR7-NEXT: slwi r4, r4, 16 +; CHECK-PWR7-NEXT: slwi r28, r28, 24 +; CHECK-PWR7-NEXT: slwi r0, r0, 16 +; CHECK-PWR7-NEXT: or r30, r30, r26 +; CHECK-PWR7-NEXT: addi r26, r1, -144 +; CHECK-PWR7-NEXT: slwi r11, r11, 24 +; CHECK-PWR7-NEXT: slwi r5, r5, 16 +; CHECK-PWR7-NEXT: or r4, r28, r4 +; CHECK-PWR7-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: slwi r3, r3, 24 +; CHECK-PWR7-NEXT: slwi r8, r8, 8 +; CHECK-PWR7-NEXT: or r11, r11, r0 +; CHECK-PWR7-NEXT: slwi r6, r6, 8 +; CHECK-PWR7-NEXT: slwi r29, r29, 8 +; CHECK-PWR7-NEXT: or r3, r3, r5 +; CHECK-PWR7-NEXT: or r5, r30, r8 +; CHECK-PWR7-NEXT: slwi r7, r7, 8 +; CHECK-PWR7-NEXT: sub r12, r12, r24 +; CHECK-PWR7-NEXT: or r4, r4, r6 +; CHECK-PWR7-NEXT: or r6, r11, r29 +; CHECK-PWR7-NEXT: or r3, r3, r7 +; CHECK-PWR7-NEXT: or r5, r5, r12 +; CHECK-PWR7-NEXT: or r4, r4, r10 +; CHECK-PWR7-NEXT: or r6, r6, r27 +; CHECK-PWR7-NEXT: or r3, r3, r9 +; CHECK-PWR7-NEXT: stw r5, -128(r1) +; CHECK-PWR7-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: stw r4, -144(r1) +; CHECK-PWR7-NEXT: stw r6, -160(r1) +; CHECK-PWR7-NEXT: stw r3, -176(r1) +; CHECK-PWR7-NEXT: addi r3, r1, -160 +; CHECK-PWR7-NEXT: addi r4, r1, -176 +; CHECK-PWR7-NEXT: lxvw4x vs0, 0, r25 +; CHECK-PWR7-NEXT: lxvw4x vs1, 0, r26 +; CHECK-PWR7-NEXT: lxvw4x vs2, 0, r3 +; CHECK-PWR7-NEXT: lxvw4x vs3, 0, r4 +; CHECK-PWR7-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-PWR7-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: xxmrghw vs1, vs3, vs2 +; CHECK-PWR7-NEXT: ld r24, -64(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: xxmrghd v2, vs1, vs0 -; CHECK-PWR7-NEXT: addi r1, r1, 416 ; CHECK-PWR7-NEXT: blr entry: %vecext = extractelement <16 x i8> %a, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -13,40 +13,38 @@ ; LE-NEXT: xxland 35, 35, 0 ; LE-NEXT: vcmpequw 2, 2, 4 ; LE-NEXT: vcmpequw 3, 3, 4 -; LE-NEXT: xxswapd 0, 34 +; LE-NEXT: xxsldwi 0, 34, 34, 3 +; LE-NEXT: xxswapd 1, 34 +; LE-NEXT: xxsldwi 2, 34, 34, 1 +; LE-NEXT: xxswapd 3, 35 ; LE-NEXT: mfvsrwz 3, 34 -; LE-NEXT: xxsldwi 1, 34, 34, 1 -; LE-NEXT: mfvsrwz 4, 35 -; LE-NEXT: xxsldwi 2, 34, 34, 3 -; LE-NEXT: mtvsrd 36, 3 -; LE-NEXT: mffprwz 3, 0 -; LE-NEXT: xxswapd 0, 35 -; LE-NEXT: mtvsrd 37, 4 -; LE-NEXT: mffprwz 4, 1 -; LE-NEXT: xxsldwi 1, 35, 35, 1 -; LE-NEXT: mtvsrd 34, 3 -; LE-NEXT: mffprwz 3, 2 -; LE-NEXT: mtvsrd 32, 4 -; LE-NEXT: mffprwz 4, 0 +; LE-NEXT: mffprwz 5, 0 ; LE-NEXT: xxsldwi 0, 35, 35, 3 -; LE-NEXT: mtvsrd 33, 3 -; LE-NEXT: mffprwz 3, 1 -; LE-NEXT: mtvsrd 38, 4 -; LE-NEXT: mtvsrd 35, 3 -; LE-NEXT: mffprwz 3, 0 -; LE-NEXT: vmrghh 2, 0, 2 -; LE-NEXT: mtvsrd 32, 3 -; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; LE-NEXT: vmrghh 4, 1, 4 -; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; LE-NEXT: vmrghh 3, 3, 6 -; LE-NEXT: lxvd2x 2, 0, 3 -; LE-NEXT: vmrghh 5, 0, 5 -; LE-NEXT: xxmrglw 0, 36, 34 -; LE-NEXT: xxmrglw 1, 37, 35 -; LE-NEXT: xxswapd 35, 2 -; LE-NEXT: xxmrgld 34, 1, 0 -; LE-NEXT: xxlor 34, 34, 35 +; LE-NEXT: mffprwz 6, 1 +; LE-NEXT: xxsldwi 1, 35, 35, 1 +; LE-NEXT: mffprwz 7, 2 +; LE-NEXT: slwi 5, 5, 16 +; LE-NEXT: mffprwz 8, 0 +; LE-NEXT: or 3, 5, 3 +; LE-NEXT: mffprwz 9, 1 +; LE-NEXT: slwi 7, 7, 16 +; LE-NEXT: mfvsrwz 4, 35 +; LE-NEXT: or 5, 7, 6 +; LE-NEXT: addis 6, 2, .LCPI0_1@toc@ha +; LE-NEXT: slwi 7, 8, 16 +; LE-NEXT: mffprwz 10, 3 +; LE-NEXT: slwi 8, 9, 16 +; LE-NEXT: rldimi 5, 3, 32, 0 +; LE-NEXT: addi 3, 6, .LCPI0_1@toc@l +; LE-NEXT: or 4, 7, 4 +; LE-NEXT: lxvd2x 0, 0, 3 +; LE-NEXT: mtfprd 1, 5 +; LE-NEXT: or 7, 8, 10 +; LE-NEXT: rldimi 7, 4, 32, 0 +; LE-NEXT: mtfprd 2, 7 +; LE-NEXT: xxswapd 34, 0 +; LE-NEXT: xxmrghd 1, 2, 1 +; LE-NEXT: xxlor 34, 1, 34 ; LE-NEXT: blr ; ; BE-LABEL: pr25080: @@ -59,42 +57,37 @@ ; BE-NEXT: xxland 34, 34, 0 ; BE-NEXT: vcmpequw 3, 3, 4 ; BE-NEXT: vcmpequw 2, 2, 4 -; BE-NEXT: xxswapd 0, 35 +; BE-NEXT: xxsldwi 0, 35, 35, 3 +; BE-NEXT: xxswapd 1, 35 +; BE-NEXT: xxsldwi 2, 35, 35, 1 +; BE-NEXT: xxswapd 3, 34 ; BE-NEXT: mfvsrwz 3, 35 -; BE-NEXT: xxsldwi 1, 35, 35, 1 -; BE-NEXT: mfvsrwz 4, 34 -; BE-NEXT: mtvsrwz 36, 3 -; BE-NEXT: xxsldwi 2, 35, 35, 3 -; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: xxswapd 0, 34 -; BE-NEXT: mtvsrwz 35, 4 -; BE-NEXT: mffprwz 4, 1 +; BE-NEXT: mffprwz 5, 0 +; BE-NEXT: xxsldwi 0, 34, 34, 3 +; BE-NEXT: mffprwz 6, 1 ; BE-NEXT: xxsldwi 1, 34, 34, 1 -; BE-NEXT: mtvsrwz 37, 3 +; BE-NEXT: mffprwz 7, 2 +; BE-NEXT: slwi 5, 5, 16 +; BE-NEXT: mffprwz 8, 0 +; BE-NEXT: or 3, 5, 3 +; BE-NEXT: mffprwz 9, 1 +; BE-NEXT: slwi 7, 7, 16 +; BE-NEXT: mfvsrwz 4, 34 +; BE-NEXT: or 5, 7, 6 +; BE-NEXT: slwi 6, 8, 16 +; BE-NEXT: mffprwz 10, 3 +; BE-NEXT: slwi 7, 9, 16 +; BE-NEXT: rldimi 5, 3, 32, 0 ; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; BE-NEXT: or 4, 6, 4 +; BE-NEXT: mtfprd 0, 5 ; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; BE-NEXT: mtvsrwz 32, 4 -; BE-NEXT: mffprwz 4, 0 -; BE-NEXT: lxvw4x 33, 0, 3 -; BE-NEXT: xxsldwi 0, 34, 34, 3 -; BE-NEXT: mffprwz 3, 1 -; BE-NEXT: mffprwz 5, 2 -; BE-NEXT: vperm 2, 0, 5, 1 -; BE-NEXT: mtvsrwz 37, 3 -; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: mtvsrwz 38, 5 -; BE-NEXT: mtvsrwz 39, 4 -; BE-NEXT: mtvsrwz 32, 3 -; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha -; BE-NEXT: vperm 4, 6, 4, 1 -; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l -; BE-NEXT: vperm 5, 5, 7, 1 -; BE-NEXT: vperm 3, 0, 3, 1 -; BE-NEXT: xxmrghw 0, 36, 34 -; BE-NEXT: xxmrghw 1, 35, 37 -; BE-NEXT: xxmrghd 34, 1, 0 -; BE-NEXT: lxvw4x 0, 0, 3 -; BE-NEXT: xxlor 34, 34, 0 +; BE-NEXT: or 6, 7, 10 +; BE-NEXT: rldimi 6, 4, 32, 0 +; BE-NEXT: mtfprd 1, 6 +; BE-NEXT: xxmrghd 0, 1, 0 +; BE-NEXT: lxvw4x 1, 0, 3 +; BE-NEXT: xxlor 34, 0, 1 ; BE-NEXT: blr entry: %0 = trunc <8 x i32> %a to <8 x i23> diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -240,79 +240,18 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; P9LE-LABEL: test16: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: sldi 4, 4, 1 -; P9LE-NEXT: li 7, 16 -; P9LE-NEXT: add 6, 3, 4 -; P9LE-NEXT: lxsihzx 4, 3, 4 -; P9LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; P9LE-NEXT: lxsihzx 2, 6, 7 -; P9LE-NEXT: li 6, 0 -; P9LE-NEXT: addi 3, 3, .LCPI2_0@toc@l -; P9LE-NEXT: mtvsrd 3, 6 -; P9LE-NEXT: lxv 0, 0(3) -; P9LE-NEXT: li 3, 0 -; P9LE-NEXT: vmrghh 4, 3, 4 -; P9LE-NEXT: vmrghh 2, 3, 2 -; P9LE-NEXT: vsplth 3, 3, 3 -; P9LE-NEXT: xxmrglw 3, 4, 3 -; P9LE-NEXT: xxperm 3, 2, 0 -; P9LE-NEXT: xxspltw 2, 3, 2 -; P9LE-NEXT: vadduwm 2, 3, 2 -; P9LE-NEXT: vextuwrx 3, 3, 2 ; P9LE-NEXT: cmpw 3, 5 ; P9LE-NEXT: bgelr+ 0 ; P9LE-NEXT: # %bb.1: # %if.then ; ; P9BE-LABEL: test16: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: sldi 4, 4, 1 -; P9BE-NEXT: li 7, 16 -; P9BE-NEXT: add 6, 3, 4 -; P9BE-NEXT: lxsihzx 1, 3, 4 -; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha -; P9BE-NEXT: lxsihzx 2, 6, 7 -; P9BE-NEXT: addis 6, 2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l -; P9BE-NEXT: addi 6, 6, .LCPI2_0@toc@l -; P9BE-NEXT: lxv 0, 0(6) -; P9BE-NEXT: li 6, 0 -; P9BE-NEXT: mtvsrwz 3, 6 -; P9BE-NEXT: xxperm 2, 3, 0 -; P9BE-NEXT: xxperm 1, 3, 0 -; P9BE-NEXT: vsplth 3, 3, 3 -; P9BE-NEXT: lxv 0, 0(3) -; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxmrghw 3, 3, 1 -; P9BE-NEXT: xxperm 2, 3, 0 -; P9BE-NEXT: xxspltw 3, 2, 1 -; P9BE-NEXT: vadduwm 2, 2, 3 -; P9BE-NEXT: vextuwlx 3, 3, 2 ; P9BE-NEXT: cmpw 3, 5 ; P9BE-NEXT: bgelr+ 0 ; P9BE-NEXT: # %bb.1: # %if.then ; ; P9BE-AIX-LABEL: test16: ; P9BE-AIX: # %bb.0: # %entry -; P9BE-AIX-NEXT: sldi 4, 4, 1 -; P9BE-AIX-NEXT: li 7, 16 -; P9BE-AIX-NEXT: add 6, 3, 4 -; P9BE-AIX-NEXT: lxsihzx 1, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1 -; P9BE-AIX-NEXT: lxsihzx 2, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0 -; P9BE-AIX-NEXT: lxv 0, 0(6) -; P9BE-AIX-NEXT: li 6, 0 -; P9BE-AIX-NEXT: mtvsrwz 3, 6 -; P9BE-AIX-NEXT: xxperm 2, 3, 0 -; P9BE-AIX-NEXT: xxperm 1, 3, 0 -; P9BE-AIX-NEXT: vsplth 3, 3, 3 -; P9BE-AIX-NEXT: lxv 0, 0(3) -; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxmrghw 3, 3, 1 -; P9BE-AIX-NEXT: xxperm 2, 3, 0 -; P9BE-AIX-NEXT: xxspltw 3, 2, 1 -; P9BE-AIX-NEXT: vadduwm 2, 2, 3 -; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 ; P9BE-AIX-NEXT: cmpw 3, 5 ; P9BE-AIX-NEXT: bgelr+ 0 ; P9BE-AIX-NEXT: # %bb.1: # %if.then @@ -320,24 +259,17 @@ ; P9BE-AIX32-LABEL: test16: ; P9BE-AIX32: # %bb.0: # %entry ; P9BE-AIX32-NEXT: slwi 4, 4, 1 -; P9BE-AIX32-NEXT: li 6, 0 ; P9BE-AIX32-NEXT: lhzux 4, 3, 4 ; P9BE-AIX32-NEXT: lhz 3, 16(3) -; P9BE-AIX32-NEXT: sth 6, -64(1) -; P9BE-AIX32-NEXT: lxv 2, -64(1) -; P9BE-AIX32-NEXT: sth 4, -48(1) -; P9BE-AIX32-NEXT: lxv 4, -48(1) -; P9BE-AIX32-NEXT: sth 3, -32(1) +; P9BE-AIX32-NEXT: stw 4, -48(1) +; P9BE-AIX32-NEXT: lxv 1, -48(1) +; P9BE-AIX32-NEXT: stw 3, -32(1) ; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0 -; P9BE-AIX32-NEXT: lxv 3, -32(1) -; P9BE-AIX32-NEXT: vmrghh 4, 2, 4 +; P9BE-AIX32-NEXT: lxv 2, -32(1) ; P9BE-AIX32-NEXT: lxv 0, 0(3) -; P9BE-AIX32-NEXT: vmrghh 3, 2, 3 -; P9BE-AIX32-NEXT: vsplth 2, 2, 0 -; P9BE-AIX32-NEXT: xxmrghw 2, 2, 4 -; P9BE-AIX32-NEXT: xxperm 3, 2, 0 -; P9BE-AIX32-NEXT: xxspltw 2, 3, 1 -; P9BE-AIX32-NEXT: vadduwm 2, 3, 2 +; P9BE-AIX32-NEXT: xxperm 2, 1, 0 +; P9BE-AIX32-NEXT: xxspltw 3, 2, 1 +; P9BE-AIX32-NEXT: vadduwm 2, 2, 3 ; P9BE-AIX32-NEXT: stxv 2, -16(1) ; P9BE-AIX32-NEXT: lwz 3, -16(1) ; P9BE-AIX32-NEXT: cmpw 3, 5 @@ -378,108 +310,35 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; P9LE-LABEL: test8: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: add 6, 3, 4 -; P9LE-NEXT: lxsibzx 2, 3, 4 -; P9LE-NEXT: li 3, 0 -; P9LE-NEXT: mtvsrd 3, 3 -; P9LE-NEXT: li 3, 8 -; P9LE-NEXT: lxsibzx 5, 6, 3 -; P9LE-NEXT: vspltb 4, 3, 7 -; P9LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; P9LE-NEXT: vmrghb 2, 3, 2 -; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l -; P9LE-NEXT: vmrglh 2, 2, 4 -; P9LE-NEXT: lxv 0, 0(3) -; P9LE-NEXT: li 3, 0 -; P9LE-NEXT: vmrghb 3, 3, 5 -; P9LE-NEXT: xxmrglw 2, 2, 4 -; P9LE-NEXT: vmrglh 3, 3, 4 -; P9LE-NEXT: xxmrglw 3, 4, 3 -; P9LE-NEXT: xxperm 2, 3, 0 -; P9LE-NEXT: xxspltw 3, 2, 2 -; P9LE-NEXT: vadduwm 2, 2, 3 -; P9LE-NEXT: vextuwrx 3, 3, 2 ; P9LE-NEXT: cmpw 3, 5 ; P9LE-NEXT: bgelr+ 0 ; P9LE-NEXT: # %bb.1: # %if.then ; ; P9BE-LABEL: test8: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: add 6, 3, 4 -; P9BE-NEXT: li 7, 8 -; P9BE-NEXT: lxsibzx 3, 3, 4 -; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha -; P9BE-NEXT: lxsibzx 0, 6, 7 -; P9BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l -; P9BE-NEXT: addi 6, 6, .LCPI3_0@toc@l -; P9BE-NEXT: lxv 1, 0(6) -; P9BE-NEXT: li 6, 0 -; P9BE-NEXT: mtvsrwz 2, 6 -; P9BE-NEXT: xxperm 0, 2, 1 -; P9BE-NEXT: xxperm 3, 2, 1 -; P9BE-NEXT: vspltb 2, 2, 7 -; P9BE-NEXT: vmrghh 3, 3, 2 -; P9BE-NEXT: xxspltw 1, 2, 0 -; P9BE-NEXT: xxmrghw 3, 3, 0 -; P9BE-NEXT: lxv 0, 0(3) -; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxperm 3, 1, 0 -; P9BE-NEXT: xxspltw 2, 3, 1 -; P9BE-NEXT: vadduwm 2, 3, 2 -; P9BE-NEXT: vextuwlx 3, 3, 2 ; P9BE-NEXT: cmpw 3, 5 ; P9BE-NEXT: bgelr+ 0 ; P9BE-NEXT: # %bb.1: # %if.then ; ; P9BE-AIX-LABEL: test8: ; P9BE-AIX: # %bb.0: # %entry -; P9BE-AIX-NEXT: add 6, 3, 4 -; P9BE-AIX-NEXT: li 7, 8 -; P9BE-AIX-NEXT: lxsibzx 3, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1 -; P9BE-AIX-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0 -; P9BE-AIX-NEXT: lxv 1, 0(6) -; P9BE-AIX-NEXT: li 6, 0 -; P9BE-AIX-NEXT: mtvsrwz 2, 6 -; P9BE-AIX-NEXT: xxperm 0, 2, 1 -; P9BE-AIX-NEXT: xxperm 3, 2, 1 -; P9BE-AIX-NEXT: vspltb 2, 2, 7 -; P9BE-AIX-NEXT: vmrghh 3, 3, 2 -; P9BE-AIX-NEXT: xxspltw 1, 2, 0 -; P9BE-AIX-NEXT: xxmrghw 3, 3, 0 -; P9BE-AIX-NEXT: lxv 0, 0(3) -; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxperm 3, 1, 0 -; P9BE-AIX-NEXT: xxspltw 2, 3, 1 -; P9BE-AIX-NEXT: vadduwm 2, 3, 2 -; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 ; P9BE-AIX-NEXT: cmpw 3, 5 ; P9BE-AIX-NEXT: bgelr+ 0 ; P9BE-AIX-NEXT: # %bb.1: # %if.then ; ; P9BE-AIX32-LABEL: test8: ; P9BE-AIX32: # %bb.0: # %entry -; P9BE-AIX32-NEXT: add 6, 3, 4 -; P9BE-AIX32-NEXT: li 7, 8 -; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4 -; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1 -; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0 -; P9BE-AIX32-NEXT: lxv 1, 0(6) -; P9BE-AIX32-NEXT: li 6, 0 -; P9BE-AIX32-NEXT: mtvsrwz 2, 6 -; P9BE-AIX32-NEXT: xxperm 0, 2, 1 -; P9BE-AIX32-NEXT: xxperm 3, 2, 1 -; P9BE-AIX32-NEXT: vspltb 2, 2, 7 -; P9BE-AIX32-NEXT: vmrghh 3, 3, 2 -; P9BE-AIX32-NEXT: xxspltw 1, 2, 0 -; P9BE-AIX32-NEXT: xxmrghw 3, 3, 0 +; P9BE-AIX32-NEXT: lbzux 4, 3, 4 +; P9BE-AIX32-NEXT: lbz 3, 8(3) +; P9BE-AIX32-NEXT: stw 4, -48(1) +; P9BE-AIX32-NEXT: lxv 1, -48(1) +; P9BE-AIX32-NEXT: stw 3, -32(1) +; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.0 +; P9BE-AIX32-NEXT: lxv 2, -32(1) ; P9BE-AIX32-NEXT: lxv 0, 0(3) -; P9BE-AIX32-NEXT: xxperm 3, 1, 0 -; P9BE-AIX32-NEXT: xxspltw 2, 3, 1 -; P9BE-AIX32-NEXT: vadduwm 2, 3, 2 +; P9BE-AIX32-NEXT: xxperm 2, 1, 0 +; P9BE-AIX32-NEXT: xxspltw 3, 2, 1 +; P9BE-AIX32-NEXT: vadduwm 2, 2, 3 ; P9BE-AIX32-NEXT: stxv 2, -16(1) ; P9BE-AIX32-NEXT: lwz 3, -16(1) ; P9BE-AIX32-NEXT: cmpw 3, 5 diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -13,8 +13,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: lis r4, -21386 +; P9LE-NEXT: lis r6, 31710 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: ori r6, r6, 63421 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -23,25 +25,25 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 31710 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: ori r4, r4, 63421 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: sub r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -124 -; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r6, r5, r6 +; P9LE-NEXT: sub r5, r6, r5 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: lis r6, -16728 +; P9LE-NEXT: mulli r5, r5, -124 +; P9LE-NEXT: ori r6, r6, 63249 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 ; P9LE-NEXT: lis r4, 21399 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: ori r4, r4, 33437 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 @@ -49,29 +51,29 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -16728 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 63249 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 8 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 8 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, -1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: lis r4, 31710 +; P9BE-NEXT: lis r6, -21386 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 63421 +; P9BE-NEXT: ori r6, r6, 37253 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: sub r4, r4, r3 @@ -80,162 +82,154 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, -124 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: mulhw r6, r5, r6 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: lis r6, 21399 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: ori r6, r6, 33437 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, -16728 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; P9BE-NEXT: ori r4, r4, 63249 -; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, -1003 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 5 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs0, vs1 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: mulhw r5, r5, r6 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srwi r5, r5, 5 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_1: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 21399 -; P8LE-NEXT: lis r8, -16728 -; P8LE-NEXT: lis r9, -21386 -; P8LE-NEXT: lis r10, 31710 -; P8LE-NEXT: ori r3, r3, 33437 -; P8LE-NEXT: ori r8, r8, 63249 -; P8LE-NEXT: ori r9, r9, 37253 -; P8LE-NEXT: ori r10, r10, 63421 +; P8LE-NEXT: lis r3, -16728 +; P8LE-NEXT: lis r9, 31710 +; P8LE-NEXT: lis r8, 21399 +; P8LE-NEXT: lis r11, -21386 +; P8LE-NEXT: ori r3, r3, 63249 +; P8LE-NEXT: ori r9, r9, 63421 +; P8LE-NEXT: ori r8, r8, 33437 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 32, 48 ; P8LE-NEXT: rldicl r6, r4, 16, 48 ; P8LE-NEXT: clrldi r7, r4, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r6, r6 ; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: extsh r10, r6 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r12, r4 +; P8LE-NEXT: mulhw r3, r10, r3 +; P8LE-NEXT: ori r10, r11, 37253 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: mulhw r3, r5, r3 -; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: mulhw r8, r6, r8 -; P8LE-NEXT: mulhw r9, r7, r9 -; P8LE-NEXT: mulhw r10, r4, r10 +; P8LE-NEXT: mulhw r9, r12, r9 +; P8LE-NEXT: mulhw r8, r5, r8 +; P8LE-NEXT: mulhw r10, r7, r10 ; P8LE-NEXT: srwi r11, r3, 31 -; P8LE-NEXT: srawi r3, r3, 5 +; P8LE-NEXT: srwi r3, r3, 8 +; P8LE-NEXT: sub r9, r9, r12 +; P8LE-NEXT: srawi r12, r8, 5 ; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: add r9, r9, r7 -; P8LE-NEXT: srawi r8, r8, 8 -; P8LE-NEXT: sub r10, r10, r4 -; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: srwi r8, r8, 31 +; P8LE-NEXT: add r10, r10, r7 ; P8LE-NEXT: srwi r11, r9, 31 -; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: mulli r3, r3, 98 +; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: add r8, r12, r8 +; P8LE-NEXT: mulli r3, r3, -1003 ; P8LE-NEXT: add r9, r9, r11 ; P8LE-NEXT: srwi r11, r10, 31 ; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: mulli r8, r8, -1003 +; P8LE-NEXT: mulli r8, r8, 98 ; P8LE-NEXT: add r10, r10, r11 -; P8LE-NEXT: mulli r9, r9, 95 -; P8LE-NEXT: mulli r10, r10, -124 -; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: mtvsrd v2, r3 -; P8LE-NEXT: sub r5, r6, r8 -; P8LE-NEXT: sub r3, r7, r9 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v2, v3 +; P8LE-NEXT: mulli r9, r9, -124 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: sub r5, r5, r8 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: or r3, r3, r5 +; P8LE-NEXT: sub r5, r7, r10 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: mtfprwz f0, r3 +; P8LE-NEXT: or r3, r4, r5 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs0, vs1 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_srem_vec_1: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, -16728 -; P8BE-NEXT: lis r8, 21399 -; P8BE-NEXT: lis r9, 31710 -; P8BE-NEXT: lis r10, -21386 -; P8BE-NEXT: ori r3, r3, 63249 -; P8BE-NEXT: ori r8, r8, 33437 -; P8BE-NEXT: ori r9, r9, 63421 -; P8BE-NEXT: ori r10, r10, 37253 +; P8BE-NEXT: lis r3, 21399 +; P8BE-NEXT: lis r9, -21386 +; P8BE-NEXT: lis r8, -16728 +; P8BE-NEXT: lis r11, 31710 +; P8BE-NEXT: ori r3, r3, 33437 +; P8BE-NEXT: ori r9, r9, 37253 +; P8BE-NEXT: ori r8, r8, 63249 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r6, r6 ; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r10, r6 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r12, r4 +; P8BE-NEXT: mulhw r3, r10, r3 +; P8BE-NEXT: ori r10, r11, 63421 ; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r8, r6, r8 -; P8BE-NEXT: mulhw r9, r7, r9 -; P8BE-NEXT: mulhw r10, r4, r10 +; P8BE-NEXT: mulhw r9, r12, r9 +; P8BE-NEXT: mulhw r8, r5, r8 +; P8BE-NEXT: mulhw r10, r7, r10 ; P8BE-NEXT: srwi r11, r3, 31 -; P8BE-NEXT: srawi r3, r3, 8 +; P8BE-NEXT: srwi r3, r3, 5 +; P8BE-NEXT: add r9, r9, r12 +; P8BE-NEXT: srawi r12, r8, 8 ; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: sub r9, r9, r7 -; P8BE-NEXT: srawi r8, r8, 5 -; P8BE-NEXT: add r10, r10, r4 -; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: sub r10, r10, r7 ; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: srawi r9, r9, 6 -; P8BE-NEXT: mulli r3, r3, -1003 +; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: add r8, r12, r8 +; P8BE-NEXT: mulli r3, r3, 98 ; P8BE-NEXT: add r9, r9, r11 ; P8BE-NEXT: srwi r11, r10, 31 ; P8BE-NEXT: srawi r10, r10, 6 -; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: mulli r8, r8, -1003 ; P8BE-NEXT: add r10, r10, r11 -; P8BE-NEXT: mulli r9, r9, -124 -; P8BE-NEXT: mulli r10, r10, 95 -; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: mulli r9, r9, 95 +; P8BE-NEXT: mulli r10, r10, -124 +; P8BE-NEXT: sub r3, r6, r3 +; P8BE-NEXT: sub r5, r5, r8 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r4, r4, r9 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r5, r7, r10 +; P8BE-NEXT: slwi r4, r4, 16 ; P8BE-NEXT: mtvsrwz v2, r3 -; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: lxvw4x v3, 0, r3 -; P8BE-NEXT: sub r3, r7, r9 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v3, v2 +; P8BE-NEXT: or r3, r4, r5 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -256,21 +250,21 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r5, r3, r4 -; P9LE-NEXT: add r5, r5, r3 -; P9LE-NEXT: srwi r6, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r6 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r6, r5 +; P9LE-NEXT: mulhw r7, r6, r4 +; P9LE-NEXT: add r6, r7, r6 +; P9LE-NEXT: srwi r7, r6, 31 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: slwi r5, r5, 16 +; P9LE-NEXT: or r3, r5, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 ; P9LE-NEXT: add r5, r5, r3 @@ -279,20 +273,20 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r6, r5 +; P9LE-NEXT: mulhw r4, r6, r4 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_2: @@ -309,25 +303,22 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r4 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r6, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r6 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r6, r5 +; P9BE-NEXT: mulhw r7, r6, r4 +; P9BE-NEXT: add r6, r7, r6 +; P9BE-NEXT: srwi r7, r6, 31 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: slwi r5, r5, 16 +; P9BE-NEXT: or r3, r5, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 @@ -335,20 +326,20 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 -; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r6, r5 +; P9BE-NEXT: mulhw r4, r6, r4 +; P9BE-NEXT: add r4, r4, r6 +; P9BE-NEXT: srwi r6, r4, 31 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_2: @@ -357,49 +348,49 @@ ; P8LE-NEXT: lis r3, -21386 ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: clrldi r5, r4, 48 ; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: mulhw r8, r5, r3 ; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: extsh r5, r5 +; P8LE-NEXT: extsh r10, r4 +; P8LE-NEXT: mulhw r11, r8, r3 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: mulhw r9, r6, r3 -; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: mulhw r10, r7, r3 -; P8LE-NEXT: mulhw r3, r4, r3 -; P8LE-NEXT: add r8, r8, r5 -; P8LE-NEXT: add r9, r9, r6 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r7 -; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: srwi r11, r9, 31 -; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: mulli r8, r8, 95 -; P8LE-NEXT: add r9, r9, r11 -; P8LE-NEXT: srwi r11, r10, 31 -; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: mulli r9, r9, 95 -; P8LE-NEXT: add r10, r10, r11 +; P8LE-NEXT: mulhw r9, r5, r3 +; P8LE-NEXT: mulhw r12, r10, r3 +; P8LE-NEXT: mulhw r3, r7, r3 +; P8LE-NEXT: add r8, r11, r8 +; P8LE-NEXT: add r9, r9, r5 +; P8LE-NEXT: add r10, r12, r10 +; P8LE-NEXT: srwi r12, r8, 31 +; P8LE-NEXT: srwi r8, r8, 6 +; P8LE-NEXT: srawi r11, r9, 6 +; P8LE-NEXT: add r3, r3, r7 +; P8LE-NEXT: srwi r9, r9, 31 +; P8LE-NEXT: add r8, r8, r12 +; P8LE-NEXT: srwi r12, r10, 31 +; P8LE-NEXT: srwi r10, r10, 6 +; P8LE-NEXT: add r9, r11, r9 +; P8LE-NEXT: add r10, r10, r12 ; P8LE-NEXT: srwi r11, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: mulli r10, r10, 95 -; P8LE-NEXT: sub r5, r5, r8 +; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: mtvsrd v2, r5 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: sub r5, r7, r10 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: sub r4, r4, r10 +; P8LE-NEXT: slwi r6, r6, 16 +; P8LE-NEXT: sub r5, r5, r9 +; P8LE-NEXT: sub r3, r7, r3 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: or r5, r6, r5 +; P8LE-NEXT: or r3, r4, r3 +; P8LE-NEXT: mtfprwz f0, r5 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs1, vs0 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_srem_vec_2: @@ -407,52 +398,49 @@ ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, -21386 ; P8BE-NEXT: ori r3, r3, 37253 -; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: mulhw r8, r5, r3 ; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r8, r6 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r10, r4 +; P8BE-NEXT: mulhw r11, r8, r3 ; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r9, r6, r3 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r10, r7, r3 -; P8BE-NEXT: mulhw r3, r4, r3 -; P8BE-NEXT: add r8, r8, r5 -; P8BE-NEXT: add r9, r9, r6 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: srawi r8, r8, 6 -; P8BE-NEXT: add r10, r10, r7 -; P8BE-NEXT: add r3, r3, r4 -; P8BE-NEXT: add r8, r8, r11 -; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: srawi r9, r9, 6 -; P8BE-NEXT: mulli r8, r8, 95 -; P8BE-NEXT: add r9, r9, r11 -; P8BE-NEXT: srwi r11, r10, 31 -; P8BE-NEXT: srawi r10, r10, 6 -; P8BE-NEXT: mulli r9, r9, 95 -; P8BE-NEXT: add r10, r10, r11 +; P8BE-NEXT: mulhw r9, r5, r3 +; P8BE-NEXT: mulhw r12, r10, r3 +; P8BE-NEXT: mulhw r3, r7, r3 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: add r9, r9, r5 +; P8BE-NEXT: add r10, r12, r10 +; P8BE-NEXT: srwi r12, r8, 31 +; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: srawi r11, r9, 6 +; P8BE-NEXT: add r3, r3, r7 +; P8BE-NEXT: srwi r9, r9, 31 +; P8BE-NEXT: add r8, r8, r12 +; P8BE-NEXT: srwi r12, r10, 31 +; P8BE-NEXT: srwi r10, r10, 6 +; P8BE-NEXT: add r9, r11, r9 +; P8BE-NEXT: add r10, r10, r12 ; P8BE-NEXT: srwi r11, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 -; P8BE-NEXT: mulli r10, r10, 95 -; P8BE-NEXT: sub r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha +; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: mtvsrwz v2, r5 -; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l +; P8BE-NEXT: mulli r10, r10, 95 +; P8BE-NEXT: mulli r9, r9, 95 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: sub r5, r7, r10 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v3, v2 +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: sub r4, r4, r10 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r5, r5, r9 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r5, r6, r5 +; P8BE-NEXT: or r3, r4, r3 +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -463,68 +451,68 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; P9LE-LABEL: combine_srem_sdiv: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, -21386 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 37253 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r5, r3, r4 -; P9LE-NEXT: add r5, r5, r3 -; P9LE-NEXT: srwi r6, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r6 -; P9LE-NEXT: mulli r6, r5, 95 -; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r6, r3 -; P9LE-NEXT: mulhw r7, r6, r4 -; P9LE-NEXT: add r6, r7, r6 -; P9LE-NEXT: srwi r7, r6, 31 -; P9LE-NEXT: srawi r6, r6, 6 -; P9LE-NEXT: add r6, r6, r7 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r7, r3 -; P9LE-NEXT: mulhw r8, r7, r4 -; P9LE-NEXT: add r7, r8, r7 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r6, r4, r5 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: sub r3, r3, r6 +; P9LE-NEXT: li r6, 0 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r6, r6 +; P9LE-NEXT: mulhw r7, r6, r5 +; P9LE-NEXT: add r7, r7, r6 ; P9LE-NEXT: srwi r8, r7, 31 ; P9LE-NEXT: srawi r7, r7, 6 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: or r3, r3, r6 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r8, r3 -; P9LE-NEXT: mulhw r4, r8, r4 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: srwi r8, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: extsh r6, r3 +; P9LE-NEXT: mulhw r8, r6, r5 +; P9LE-NEXT: add r6, r8, r6 +; P9LE-NEXT: srwi r8, r6, 31 +; P9LE-NEXT: srawi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r8 +; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: xxmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: xxmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 4 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r9, r8 +; P9LE-NEXT: mulhw r5, r9, r5 +; P9LE-NEXT: add r5, r5, r9 +; P9LE-NEXT: srwi r9, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r9 +; P9LE-NEXT: mulli r9, r5, 95 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: or r3, r3, r8 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: slwi r3, r4, 16 +; P9LE-NEXT: or r3, r3, r7 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 +; P9LE-NEXT: mtfprwz f0, r3 +; P9LE-NEXT: slwi r3, r6, 16 +; P9LE-NEXT: or r3, r3, r5 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v3, vs1, vs0 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: combine_srem_sdiv: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: lis r5, -21386 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 37253 @@ -536,53 +524,50 @@ ; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r6, r3 -; P9BE-NEXT: mulhw r7, r6, r5 -; P9BE-NEXT: add r6, r7, r6 -; P9BE-NEXT: srwi r7, r6, 31 -; P9BE-NEXT: srawi r6, r6, 6 -; P9BE-NEXT: add r6, r6, r7 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r7, r3 -; P9BE-NEXT: xxperm vs0, vs1, vs2 +; P9BE-NEXT: li r6, 6 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r7, r6 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 ; P9BE-NEXT: srawi r7, r7, 6 ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtfprwz f1, r3 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: or r3, r3, r6 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r5 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r8, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r8 -; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: extsh r6, r3 +; P9BE-NEXT: mulhw r8, r6, r5 +; P9BE-NEXT: add r6, r8, r6 +; P9BE-NEXT: srwi r8, r6, 31 +; P9BE-NEXT: srawi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r8 +; P9BE-NEXT: mulli r8, r6, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: mtfprwz f3, r5 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 -; P9BE-NEXT: mtfprwz f0, r4 -; P9BE-NEXT: mtfprwz f1, r6 -; P9BE-NEXT: xxperm vs0, vs1, vs2 -; P9BE-NEXT: mtfprwz f1, r7 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v3, vs1, vs0 +; P9BE-NEXT: li r8, 2 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r9, r8 +; P9BE-NEXT: mulhw r5, r9, r5 +; P9BE-NEXT: add r5, r5, r9 +; P9BE-NEXT: srwi r9, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r9 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: or r3, r3, r8 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: slwi r3, r4, 16 +; P9BE-NEXT: or r3, r3, r7 +; P9BE-NEXT: vmrgow v2, v2, v3 +; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: slwi r3, r6, 16 +; P9BE-NEXT: or r3, r3, r5 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vmrgow v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -590,122 +575,121 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, -21386 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: extsh r8, r5 +; P8LE-NEXT: extsh r6, r6 +; P8LE-NEXT: rldicl r4, r4, 32, 48 ; P8LE-NEXT: extsh r9, r7 -; P8LE-NEXT: mulhw r10, r5, r3 -; P8LE-NEXT: mulhw r11, r8, r3 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: mulhw r12, r9, r3 -; P8LE-NEXT: extsh r0, r4 -; P8LE-NEXT: mulhw r3, r0, r3 -; P8LE-NEXT: add r10, r10, r5 -; P8LE-NEXT: add r8, r11, r8 -; P8LE-NEXT: srwi r11, r10, 31 -; P8LE-NEXT: add r9, r12, r9 -; P8LE-NEXT: srawi r10, r10, 6 +; P8LE-NEXT: mulhw r10, r8, r3 +; P8LE-NEXT: extsh r11, r4 +; P8LE-NEXT: mulhw r12, r6, r3 +; P8LE-NEXT: mulhw r0, r9, r3 +; P8LE-NEXT: mulhw r3, r11, r3 +; P8LE-NEXT: add r8, r10, r8 +; P8LE-NEXT: add r10, r12, r6 +; P8LE-NEXT: add r9, r0, r9 ; P8LE-NEXT: srawi r12, r8, 6 ; P8LE-NEXT: srwi r8, r8, 31 -; P8LE-NEXT: add r10, r10, r11 -; P8LE-NEXT: add r3, r3, r0 -; P8LE-NEXT: srawi r11, r9, 6 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: srawi r11, r10, 6 +; P8LE-NEXT: srawi r0, r9, 6 +; P8LE-NEXT: srwi r10, r10, 31 ; P8LE-NEXT: srwi r9, r9, 31 ; P8LE-NEXT: add r8, r12, r8 -; P8LE-NEXT: mtvsrd v2, r10 +; P8LE-NEXT: srawi r30, r3, 6 +; P8LE-NEXT: srwi r3, r3, 31 +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: mulli r11, r8, 95 +; P8LE-NEXT: add r3, r30, r3 +; P8LE-NEXT: slwi r8, r8, 16 ; P8LE-NEXT: mulli r12, r10, 95 -; P8LE-NEXT: add r9, r11, r9 -; P8LE-NEXT: srwi r11, r3, 31 -; P8LE-NEXT: mtvsrd v3, r8 -; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: mulli r10, r8, 95 -; P8LE-NEXT: mtvsrd v4, r9 -; P8LE-NEXT: add r3, r3, r11 -; P8LE-NEXT: mulli r8, r9, 95 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: mulli r9, r3, 95 -; P8LE-NEXT: sub r5, r5, r12 -; P8LE-NEXT: sub r6, r6, r10 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: mtvsrd v5, r6 -; P8LE-NEXT: sub r5, r7, r8 -; P8LE-NEXT: sub r4, r4, r9 -; P8LE-NEXT: mtvsrd v0, r5 -; P8LE-NEXT: mtvsrd v1, r4 -; P8LE-NEXT: vmrghh v3, v5, v3 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v0, v1, v0 -; P8LE-NEXT: vmrghh v4, v5, v4 -; P8LE-NEXT: xxmrglw v3, v0, v3 -; P8LE-NEXT: xxmrglw v2, v4, v2 -; P8LE-NEXT: vadduhm v2, v3, v2 +; P8LE-NEXT: mulli r0, r9, 95 +; P8LE-NEXT: or r8, r8, r10 +; P8LE-NEXT: mulli r30, r3, 95 +; P8LE-NEXT: mtfprwz f0, r8 +; P8LE-NEXT: sub r5, r5, r11 +; P8LE-NEXT: sub r6, r6, r12 +; P8LE-NEXT: sub r7, r7, r0 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r4, r30 +; P8LE-NEXT: slwi r7, r7, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: slwi r6, r9, 16 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: or r4, r7, r4 +; P8LE-NEXT: or r3, r6, r3 +; P8LE-NEXT: mtfprwz f1, r5 +; P8LE-NEXT: mtfprwz f2, r4 +; P8LE-NEXT: mtfprwz f3, r3 +; P8LE-NEXT: xxmrghw v2, vs2, vs1 +; P8LE-NEXT: xxmrghw v3, vs3, vs0 +; P8LE-NEXT: vadduhm v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: combine_srem_sdiv: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r5, v2 -; P8BE-NEXT: lis r4, -21386 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -21386 ; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8BE-NEXT: addis r30, r2, .LCPI2_0@toc@ha -; P8BE-NEXT: ori r4, r4, 37253 -; P8BE-NEXT: clrldi r3, r5, 48 -; P8BE-NEXT: rldicl r6, r5, 48, 48 -; P8BE-NEXT: rldicl r7, r5, 32, 48 -; P8BE-NEXT: extsh r8, r3 +; P8BE-NEXT: ori r3, r3, 37253 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r6, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: extsh r8, r5 ; P8BE-NEXT: extsh r9, r6 +; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: extsh r10, r7 -; P8BE-NEXT: mulhw r11, r8, r4 -; P8BE-NEXT: mulhw r12, r9, r4 -; P8BE-NEXT: rldicl r5, r5, 16, 48 -; P8BE-NEXT: mulhw r0, r10, r4 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: mulhw r4, r5, r4 +; P8BE-NEXT: mulhw r11, r8, r3 +; P8BE-NEXT: extsh r12, r4 +; P8BE-NEXT: mulhw r0, r9, r3 +; P8BE-NEXT: mulhw r30, r10, r3 +; P8BE-NEXT: mulhw r3, r12, r3 ; P8BE-NEXT: add r8, r11, r8 -; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: srwi r11, r8, 31 -; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: srawi r8, r8, 6 -; P8BE-NEXT: addi r0, r30, .LCPI2_0@toc@l -; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: add r9, r0, r9 +; P8BE-NEXT: add r10, r30, r10 +; P8BE-NEXT: srawi r11, r8, 6 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: add r3, r3, r12 ; P8BE-NEXT: srawi r12, r9, 6 +; P8BE-NEXT: srawi r0, r10, 6 ; P8BE-NEXT: srwi r9, r9, 31 -; P8BE-NEXT: add r8, r8, r11 -; P8BE-NEXT: add r4, r4, r5 -; P8BE-NEXT: lxvw4x v2, 0, r0 -; P8BE-NEXT: srawi r11, r10, 6 ; P8BE-NEXT: srwi r10, r10, 31 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: srawi r30, r3, 6 +; P8BE-NEXT: srwi r3, r3, 31 ; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: mtvsrwz v3, r8 -; P8BE-NEXT: mulli r12, r8, 95 -; P8BE-NEXT: add r10, r11, r10 -; P8BE-NEXT: srwi r11, r4, 31 -; P8BE-NEXT: mtvsrwz v4, r9 -; P8BE-NEXT: srawi r4, r4, 6 -; P8BE-NEXT: mulli r8, r9, 95 -; P8BE-NEXT: mtvsrwz v5, r10 -; P8BE-NEXT: add r4, r4, r11 -; P8BE-NEXT: mulli r9, r10, 95 -; P8BE-NEXT: vperm v3, v4, v3, v2 -; P8BE-NEXT: mulli r10, r4, 95 -; P8BE-NEXT: sub r3, r3, r12 -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: mtvsrwz v4, r3 -; P8BE-NEXT: mtvsrwz v0, r6 -; P8BE-NEXT: sub r3, r7, r9 -; P8BE-NEXT: sub r5, r5, r10 -; P8BE-NEXT: mtvsrwz v1, r3 -; P8BE-NEXT: mtvsrwz v6, r5 -; P8BE-NEXT: vperm v4, v0, v4, v2 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v1, v6, v1, v2 -; P8BE-NEXT: vperm v2, v0, v5, v2 -; P8BE-NEXT: xxmrghw v4, v1, v4 -; P8BE-NEXT: xxmrghw v2, v2, v3 -; P8BE-NEXT: vadduhm v2, v4, v2 +; P8BE-NEXT: add r10, r0, r10 +; P8BE-NEXT: mulli r11, r8, 95 +; P8BE-NEXT: add r3, r30, r3 +; P8BE-NEXT: slwi r8, r8, 16 +; P8BE-NEXT: mulli r12, r9, 95 +; P8BE-NEXT: mulli r0, r10, 95 +; P8BE-NEXT: or r8, r8, r9 +; P8BE-NEXT: mulli r30, r3, 95 +; P8BE-NEXT: mtvsrwz v2, r8 +; P8BE-NEXT: sub r5, r5, r11 +; P8BE-NEXT: sub r6, r6, r12 +; P8BE-NEXT: sub r7, r7, r0 +; P8BE-NEXT: slwi r5, r5, 16 +; P8BE-NEXT: sub r4, r4, r30 +; P8BE-NEXT: slwi r7, r7, 16 +; P8BE-NEXT: or r5, r5, r6 +; P8BE-NEXT: slwi r6, r10, 16 +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: or r4, r7, r4 +; P8BE-NEXT: or r3, r6, r3 +; P8BE-NEXT: mtvsrwz v3, r5 +; P8BE-NEXT: mtvsrwz v4, r4 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: vmrgow v3, v4, v3 +; P8BE-NEXT: vmrgow v2, v5, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -724,39 +708,39 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 6 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 5 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 5 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -21386 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: srawi r5, r5, 5 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 5 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 3 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 3 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: srawi r5, r4, 3 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 3 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: or r3, r3, r4 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_power_of_two: @@ -768,24 +752,21 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 5 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 6 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 6 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 6 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -793,17 +774,17 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 3 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 3 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs0, vs1 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r5, r4 +; P9BE-NEXT: srawi r5, r5, 3 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 3 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_power_of_two: @@ -813,39 +794,39 @@ ; P8LE-NEXT: ori r3, r3, 37253 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: clrldi r6, r4, 48 -; P8LE-NEXT: extsh r5, r5 -; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: mulhw r3, r5, r3 -; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: srawi r8, r6, 6 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: extsh r6, r5 ; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: addze r8, r8 +; P8LE-NEXT: mulhw r3, r6, r3 +; P8LE-NEXT: srawi r8, r7, 6 +; P8LE-NEXT: rldicl r9, r4, 48, 48 ; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: srawi r9, r7, 5 ; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: slwi r8, r8, 6 -; P8LE-NEXT: add r3, r3, r5 -; P8LE-NEXT: addze r9, r9 -; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: add r3, r3, r6 +; P8LE-NEXT: addze r6, r8 +; P8LE-NEXT: extsh r8, r9 ; P8LE-NEXT: srwi r10, r3, 31 -; P8LE-NEXT: srawi r3, r3, 6 -; P8LE-NEXT: slwi r8, r9, 5 -; P8LE-NEXT: mtvsrd v2, r6 +; P8LE-NEXT: srwi r3, r3, 6 +; P8LE-NEXT: slwi r6, r6, 6 ; P8LE-NEXT: add r3, r3, r10 -; P8LE-NEXT: srawi r9, r4, 3 -; P8LE-NEXT: sub r6, r7, r8 +; P8LE-NEXT: srawi r8, r8, 5 +; P8LE-NEXT: sub r6, r7, r6 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: addze r7, r9 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: addze r8, r8 +; P8LE-NEXT: slwi r8, r8, 5 +; P8LE-NEXT: srawi r10, r4, 3 +; P8LE-NEXT: sub r8, r9, r8 +; P8LE-NEXT: addze r9, r10 +; P8LE-NEXT: slwi r7, r9, 3 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: slwi r5, r7, 3 -; P8LE-NEXT: sub r4, r4, r5 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v3, v4, v5 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: slwi r5, r8, 16 +; P8LE-NEXT: sub r4, r4, r7 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: or r3, r3, r4 +; P8LE-NEXT: mtfprwz f0, r5 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs1, vs0 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_power_of_two: @@ -858,38 +839,35 @@ ; P8BE-NEXT: extsh r5, r5 ; P8BE-NEXT: extsh r6, r6 ; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: rldicl r7, r4, 16, 48 -; P8BE-NEXT: srawi r8, r6, 5 -; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: addze r8, r8 +; P8BE-NEXT: srawi r7, r6, 5 +; P8BE-NEXT: rldicl r8, r4, 16, 48 +; P8BE-NEXT: addze r7, r7 +; P8BE-NEXT: extsh r9, r8 ; P8BE-NEXT: rldicl r4, r4, 48, 48 -; P8BE-NEXT: srawi r9, r7, 6 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: slwi r8, r8, 5 +; P8BE-NEXT: slwi r7, r7, 5 +; P8BE-NEXT: srawi r9, r9, 6 +; P8BE-NEXT: sub r6, r6, r7 ; P8BE-NEXT: add r3, r3, r5 ; P8BE-NEXT: addze r9, r9 -; P8BE-NEXT: sub r6, r6, r8 ; P8BE-NEXT: srwi r10, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 -; P8BE-NEXT: slwi r8, r9, 6 -; P8BE-NEXT: mtvsrwz v2, r6 +; P8BE-NEXT: slwi r9, r9, 6 ; P8BE-NEXT: add r3, r3, r10 -; P8BE-NEXT: srawi r9, r4, 3 -; P8BE-NEXT: addis r10, r2, .LCPI3_0@toc@ha -; P8BE-NEXT: sub r6, r7, r8 +; P8BE-NEXT: extsh r10, r4 +; P8BE-NEXT: sub r8, r8, r9 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: addze r8, r9 -; P8BE-NEXT: addi r7, r10, .LCPI3_0@toc@l -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: lxvw4x v3, 0, r7 +; P8BE-NEXT: srawi r10, r10, 3 +; P8BE-NEXT: slwi r7, r8, 16 +; P8BE-NEXT: addze r10, r10 +; P8BE-NEXT: or r6, r7, r6 +; P8BE-NEXT: slwi r9, r10, 3 +; P8BE-NEXT: mtvsrwz v2, r6 +; P8BE-NEXT: sub r4, r4, r9 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: slwi r5, r8, 3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: sub r4, r4, r5 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v2, v3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r3, r4, r3 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v2, v3 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -900,25 +878,25 @@ ; P9LE-LABEL: dont_fold_srem_one: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: lis r4, -14230 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: lis r6, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 30865 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: ori r6, r6, 47143 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 9 +; P9LE-NEXT: srwi r4, r4, 9 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: sub r3, r3, r4 ; P9LE-NEXT: lis r4, -19946 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: slwi r3, r3, 16 ; P9LE-NEXT: ori r4, r4, 17097 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v3, v4 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -927,165 +905,149 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_one: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: lis r4, -14230 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 4 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 23 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 +; P9BE-NEXT: srawi r4, r4, 9 ; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 +; P9BE-NEXT: lis r5, -19946 +; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: ori r5, r5, 17097 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -14230 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: ori r4, r4, 30865 -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) -; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: xxperm vs1, vs0, vs2 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: add r4, r5, r4 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 9 +; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: lis r5, 24749 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs0, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs0, vs1 +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: or r3, r3, r4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r5, 24749 -; P8LE-NEXT: lis r6, -19946 +; P8LE-NEXT: lis r5, -19946 +; P8LE-NEXT: lis r6, 24749 ; P8LE-NEXT: lis r8, -14230 -; P8LE-NEXT: ori r5, r5, 47143 -; P8LE-NEXT: ori r6, r6, 17097 +; P8LE-NEXT: ori r5, r5, 17097 +; P8LE-NEXT: ori r6, r6, 47143 ; P8LE-NEXT: ori r8, r8, 30865 ; P8LE-NEXT: mffprd r3, f0 -; P8LE-NEXT: rldicl r4, r3, 16, 48 -; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: rldicl r4, r3, 32, 48 +; P8LE-NEXT: rldicl r7, r3, 16, 48 ; P8LE-NEXT: rldicl r3, r3, 48, 48 ; P8LE-NEXT: extsh r4, r4 -; P8LE-NEXT: extsh r7, r7 -; P8LE-NEXT: extsh r3, r3 +; P8LE-NEXT: extsh r9, r7 +; P8LE-NEXT: extsh r10, r3 ; P8LE-NEXT: mulhw r5, r4, r5 -; P8LE-NEXT: mulhw r6, r7, r6 -; P8LE-NEXT: mulhw r8, r3, r8 -; P8LE-NEXT: srwi r9, r5, 31 -; P8LE-NEXT: srawi r5, r5, 11 -; P8LE-NEXT: add r6, r6, r7 -; P8LE-NEXT: add r8, r8, r3 -; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: mulhw r6, r9, r6 +; P8LE-NEXT: mulhw r8, r10, r8 +; P8LE-NEXT: add r5, r5, r4 ; P8LE-NEXT: srwi r9, r6, 31 -; P8LE-NEXT: srawi r6, r6, 4 +; P8LE-NEXT: srwi r6, r6, 11 +; P8LE-NEXT: add r8, r8, r10 ; P8LE-NEXT: add r6, r6, r9 +; P8LE-NEXT: srwi r9, r5, 31 +; P8LE-NEXT: srawi r5, r5, 4 +; P8LE-NEXT: mulli r6, r6, 5423 +; P8LE-NEXT: add r5, r5, r9 ; P8LE-NEXT: srwi r9, r8, 31 -; P8LE-NEXT: srawi r8, r8, 9 -; P8LE-NEXT: mulli r5, r5, 5423 +; P8LE-NEXT: srwi r8, r8, 9 ; P8LE-NEXT: add r8, r8, r9 -; P8LE-NEXT: mulli r6, r6, 23 -; P8LE-NEXT: li r9, 0 +; P8LE-NEXT: mulli r5, r5, 23 ; P8LE-NEXT: mulli r8, r8, 654 -; P8LE-NEXT: mtvsrd v2, r9 +; P8LE-NEXT: sub r6, r7, r6 ; P8LE-NEXT: sub r4, r4, r5 -; P8LE-NEXT: sub r5, r7, r6 -; P8LE-NEXT: mtvsrd v3, r4 +; P8LE-NEXT: slwi r5, r6, 16 ; P8LE-NEXT: sub r3, r3, r8 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v3, v4 -; P8LE-NEXT: vmrghh v2, v5, v2 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: or r4, r5, r4 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: mtfprwz f0, r4 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs0, vs1 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_one: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 24749 -; P8BE-NEXT: lis r7, -19946 -; P8BE-NEXT: lis r8, -14230 -; P8BE-NEXT: ori r3, r3, 47143 -; P8BE-NEXT: ori r7, r7, 17097 -; P8BE-NEXT: ori r8, r8, 30865 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: lis r3, -19946 +; P8BE-NEXT: lis r7, 24749 +; P8BE-NEXT: lis r9, -14230 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: ori r7, r7, 47143 +; P8BE-NEXT: ori r9, r9, 30865 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r8, r4, 48 +; P8BE-NEXT: extsh r6, r5 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsh r8, r8 +; P8BE-NEXT: mulhw r3, r6, r3 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: mulhw r3, r5, r3 -; P8BE-NEXT: mulhw r7, r6, r7 -; P8BE-NEXT: mulhw r8, r4, r8 -; P8BE-NEXT: srawi r9, r3, 11 -; P8BE-NEXT: srwi r3, r3, 31 -; P8BE-NEXT: add r7, r7, r6 -; P8BE-NEXT: add r8, r8, r4 -; P8BE-NEXT: add r3, r9, r3 -; P8BE-NEXT: srwi r9, r7, 31 -; P8BE-NEXT: srawi r7, r7, 4 -; P8BE-NEXT: srawi r10, r8, 9 -; P8BE-NEXT: srwi r8, r8, 31 -; P8BE-NEXT: add r7, r7, r9 -; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: add r8, r10, r8 -; P8BE-NEXT: li r10, 0 -; P8BE-NEXT: mulli r7, r7, 23 -; P8BE-NEXT: mulli r8, r8, 654 -; P8BE-NEXT: mtvsrwz v2, r10 +; P8BE-NEXT: mulhw r7, r8, r7 +; P8BE-NEXT: mulhw r9, r4, r9 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r3, 31 +; P8BE-NEXT: srwi r3, r3, 4 +; P8BE-NEXT: add r9, r9, r4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r7, 31 +; P8BE-NEXT: srawi r7, r7, 11 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: add r6, r7, r6 +; P8BE-NEXT: srwi r7, r9, 31 +; P8BE-NEXT: srawi r9, r9, 9 +; P8BE-NEXT: mulli r6, r6, 5423 +; P8BE-NEXT: add r7, r9, r7 +; P8BE-NEXT: mulli r7, r7, 654 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r6, r7 -; P8BE-NEXT: mtvsrwz v4, r3 -; P8BE-NEXT: sub r3, r4, r8 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: xxmrghw v2, v2, v4 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r5, r8, r6 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r4, r4, r7 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1097,8 +1059,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, -19946 +; P9LE-NEXT: lis r6, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: ori r6, r6, 47143 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -1107,161 +1071,143 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r5, r4 +; P9LE-NEXT: mulhw r5, r5, r6 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 15 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 15 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 15 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v3, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs0, vs1 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_i16_smax: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: lis r5, -19946 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: ori r5, r5, 17097 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: add r4, r5, r4 ; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 4 +; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, 24749 ; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: or r3, r3, r4 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: xxperm vs1, vs0, vs2 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 15 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs0, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs0, vs1 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_i16_smax: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, 24749 -; P8LE-NEXT: lis r5, -19946 -; P8LE-NEXT: ori r4, r4, 47143 -; P8LE-NEXT: ori r5, r5, 17097 +; P8LE-NEXT: lis r5, 24749 +; P8LE-NEXT: lis r4, -19946 +; P8LE-NEXT: ori r5, r5, 47143 +; P8LE-NEXT: ori r4, r4, 17097 ; P8LE-NEXT: mffprd r3, f0 -; P8LE-NEXT: rldicl r6, r3, 16, 48 -; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: rldicl r7, r3, 16, 48 +; P8LE-NEXT: rldicl r6, r3, 32, 48 +; P8LE-NEXT: extsh r8, r7 ; P8LE-NEXT: extsh r6, r6 -; P8LE-NEXT: extsh r7, r7 +; P8LE-NEXT: mulhw r5, r8, r5 ; P8LE-NEXT: mulhw r4, r6, r4 -; P8LE-NEXT: mulhw r5, r7, r5 ; P8LE-NEXT: rldicl r3, r3, 48, 48 -; P8LE-NEXT: extsh r3, r3 -; P8LE-NEXT: srwi r8, r4, 31 -; P8LE-NEXT: srawi r4, r4, 11 -; P8LE-NEXT: add r5, r5, r7 -; P8LE-NEXT: add r4, r4, r8 ; P8LE-NEXT: srwi r8, r5, 31 -; P8LE-NEXT: srawi r5, r5, 4 -; P8LE-NEXT: mulli r4, r4, 5423 +; P8LE-NEXT: srwi r5, r5, 11 +; P8LE-NEXT: add r4, r4, r6 ; P8LE-NEXT: add r5, r5, r8 -; P8LE-NEXT: srawi r9, r3, 15 -; P8LE-NEXT: li r8, 0 -; P8LE-NEXT: mulli r5, r5, 23 -; P8LE-NEXT: mtvsrd v2, r8 -; P8LE-NEXT: sub r4, r6, r4 -; P8LE-NEXT: addze r6, r9 -; P8LE-NEXT: slwi r6, r6, 15 -; P8LE-NEXT: mtvsrd v3, r4 +; P8LE-NEXT: srwi r8, r4, 31 +; P8LE-NEXT: srawi r4, r4, 4 +; P8LE-NEXT: mulli r5, r5, 5423 +; P8LE-NEXT: add r4, r4, r8 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: mulli r4, r4, 23 +; P8LE-NEXT: srawi r8, r8, 15 ; P8LE-NEXT: sub r5, r7, r5 -; P8LE-NEXT: sub r3, r3, r6 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v3, v4 -; P8LE-NEXT: vmrghh v2, v5, v2 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: addze r7, r8 +; P8LE-NEXT: slwi r7, r7, 15 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r6, r4 +; P8LE-NEXT: sub r3, r3, r7 +; P8LE-NEXT: or r4, r5, r4 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: mtfprwz f0, r4 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs0, vs1 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_i16_smax: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r3, v2 -; P8BE-NEXT: lis r4, 24749 -; P8BE-NEXT: lis r5, -19946 -; P8BE-NEXT: li r9, 0 -; P8BE-NEXT: ori r4, r4, 47143 -; P8BE-NEXT: ori r5, r5, 17097 -; P8BE-NEXT: mtvsrwz v2, r9 -; P8BE-NEXT: clrldi r6, r3, 48 -; P8BE-NEXT: rldicl r7, r3, 48, 48 -; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: mulhw r4, r6, r4 -; P8BE-NEXT: mulhw r5, r7, r5 -; P8BE-NEXT: rldicl r3, r3, 32, 48 -; P8BE-NEXT: extsh r3, r3 -; P8BE-NEXT: srwi r8, r4, 31 -; P8BE-NEXT: srawi r4, r4, 11 -; P8BE-NEXT: add r5, r5, r7 -; P8BE-NEXT: add r4, r4, r8 -; P8BE-NEXT: srwi r8, r5, 31 -; P8BE-NEXT: srawi r5, r5, 4 -; P8BE-NEXT: mulli r4, r4, 5423 -; P8BE-NEXT: add r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha -; P8BE-NEXT: srawi r10, r3, 15 -; P8BE-NEXT: mulli r5, r5, 23 -; P8BE-NEXT: sub r4, r6, r4 -; P8BE-NEXT: addi r6, r8, .LCPI5_0@toc@l -; P8BE-NEXT: addze r8, r10 -; P8BE-NEXT: lxvw4x v3, 0, r6 -; P8BE-NEXT: slwi r6, r8, 15 -; P8BE-NEXT: mtvsrwz v4, r4 -; P8BE-NEXT: sub r5, r7, r5 -; P8BE-NEXT: sub r3, r3, r6 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: xxmrghw v2, v2, v4 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -19946 +; P8BE-NEXT: lis r7, 24749 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: ori r7, r7, 47143 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r8, r4, 48 +; P8BE-NEXT: extsh r6, r5 +; P8BE-NEXT: extsh r8, r8 +; P8BE-NEXT: mulhw r3, r6, r3 +; P8BE-NEXT: mulhw r7, r8, r7 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r3, 31 +; P8BE-NEXT: srwi r3, r3, 4 +; P8BE-NEXT: add r3, r3, r6 +; P8BE-NEXT: srwi r6, r7, 31 +; P8BE-NEXT: srawi r7, r7, 11 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: add r6, r7, r6 +; P8BE-NEXT: mulli r6, r6, 5423 +; P8BE-NEXT: sub r3, r5, r3 +; P8BE-NEXT: srawi r5, r4, 15 +; P8BE-NEXT: addze r5, r5 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: sub r6, r8, r6 +; P8BE-NEXT: slwi r5, r5, 15 +; P8BE-NEXT: or r3, r3, r6 +; P8BE-NEXT: sub r4, r4, r5 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -13,173 +13,167 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: lis r4, 689 +; P9LE-NEXT: lis r6, 528 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 55879 +; P9LE-NEXT: ori r6, r6, 33826 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 528 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: ori r4, r4, 33826 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: mulli r4, r4, 124 -; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r5, r4, 16 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: lis r6, 65 +; P9LE-NEXT: ori r6, r6, 22281 +; P9LE-NEXT: mulli r5, r5, 124 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 ; P9LE-NEXT: lis r4, 668 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: ori r4, r4, 48149 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 65 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 22281 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: mulli r4, r4, 1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r5, r4, 16 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 65 +; P9BE-NEXT: lis r6, 668 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 22281 +; P9BE-NEXT: ori r6, r6, 48149 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: mulli r4, r4, 1003 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 668 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 48149 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r5, r5, r6 +; P9BE-NEXT: lis r6, 689 +; P9BE-NEXT: ori r6, r6, 55879 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, 528 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; P9BE-NEXT: ori r4, r4, 33826 -; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: ori r4, r4, 33826 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: mulli r4, r4, 124 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 689 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: ori r4, r4, 55879 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_1: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 689 -; P8LE-NEXT: lis r8, 528 +; P8LE-NEXT: lis r3, 528 +; P8LE-NEXT: lis r6, 65 +; P8LE-NEXT: ori r3, r3, 33826 +; P8LE-NEXT: ori r6, r6, 22281 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r5, 16 +; P8LE-NEXT: clrlwi r9, r7, 16 +; P8LE-NEXT: mulhwu r3, r8, r3 +; P8LE-NEXT: lis r8, 689 +; P8LE-NEXT: clrldi r10, r4, 48 +; P8LE-NEXT: mulhwu r6, r9, r6 ; P8LE-NEXT: lis r9, 668 -; P8LE-NEXT: lis r10, 65 -; P8LE-NEXT: ori r3, r3, 55879 -; P8LE-NEXT: ori r8, r8, 33826 +; P8LE-NEXT: ori r8, r8, 55879 +; P8LE-NEXT: rldicl r4, r4, 32, 48 +; P8LE-NEXT: clrlwi r10, r10, 16 ; P8LE-NEXT: ori r9, r9, 48149 -; P8LE-NEXT: ori r10, r10, 22281 -; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r6, r6, 16 -; P8LE-NEXT: mulhwu r3, r5, r3 -; P8LE-NEXT: clrlwi r7, r7, 16 ; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: mulhwu r8, r6, r8 -; P8LE-NEXT: mulhwu r9, r7, r9 -; P8LE-NEXT: mulhwu r10, r4, r10 -; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: mulli r8, r8, 124 +; P8LE-NEXT: mulhwu r8, r10, r8 +; P8LE-NEXT: mulhwu r9, r4, r9 +; P8LE-NEXT: mulli r3, r3, 124 +; P8LE-NEXT: mulli r6, r6, 1003 +; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: mulli r9, r9, 98 -; P8LE-NEXT: mulli r10, r10, 1003 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: sub r5, r6, r8 -; P8LE-NEXT: mtvsrd v2, r3 -; P8LE-NEXT: sub r3, r7, r9 -; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: mtvsrd v4, r3 -; P8LE-NEXT: mtvsrd v5, r4 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: sub r6, r7, r6 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: slwi r6, r6, 16 +; P8LE-NEXT: sub r5, r10, r8 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: or r3, r3, r5 +; P8LE-NEXT: or r4, r6, r4 +; P8LE-NEXT: mtfprwz f0, r3 +; P8LE-NEXT: mtfprwz f1, r4 +; P8LE-NEXT: xxmrghw v2, vs1, vs0 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_urem_vec_1: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 65 -; P8BE-NEXT: lis r8, 668 +; P8BE-NEXT: lis r3, 668 +; P8BE-NEXT: lis r6, 689 +; P8BE-NEXT: ori r3, r3, 48149 +; P8BE-NEXT: ori r6, r6, 55879 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: clrlwi r8, r5, 16 +; P8BE-NEXT: clrlwi r9, r7, 16 +; P8BE-NEXT: mulhwu r3, r8, r3 +; P8BE-NEXT: lis r8, 65 +; P8BE-NEXT: clrldi r10, r4, 48 +; P8BE-NEXT: mulhwu r6, r9, r6 ; P8BE-NEXT: lis r9, 528 -; P8BE-NEXT: lis r10, 689 -; P8BE-NEXT: ori r3, r3, 22281 -; P8BE-NEXT: ori r8, r8, 48149 +; P8BE-NEXT: ori r8, r8, 22281 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: clrlwi r10, r10, 16 ; P8BE-NEXT: ori r9, r9, 33826 -; P8BE-NEXT: ori r10, r10, 55879 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: rldicl r4, r4, 16, 48 -; P8BE-NEXT: mulhwu r3, r5, r3 -; P8BE-NEXT: clrlwi r7, r7, 16 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r8, r6, r8 -; P8BE-NEXT: mulhwu r9, r7, r9 -; P8BE-NEXT: mulhwu r10, r4, r10 -; P8BE-NEXT: mulli r3, r3, 1003 -; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: mulhwu r8, r10, r8 +; P8BE-NEXT: mulhwu r9, r4, r9 +; P8BE-NEXT: mulli r3, r3, 98 +; P8BE-NEXT: mulli r6, r6, 95 +; P8BE-NEXT: mulli r8, r8, 1003 ; P8BE-NEXT: mulli r9, r9, 124 -; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: sub r6, r7, r6 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r5, r10, r8 +; P8BE-NEXT: sub r4, r4, r9 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: or r4, r6, r4 ; P8BE-NEXT: mtvsrwz v2, r3 -; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: lxvw4x v3, 0, r3 -; P8BE-NEXT: sub r3, r7, r9 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: mtvsrwz v0, r4 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v3, v2 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -196,31 +190,31 @@ ; P9LE-NEXT: mulhwu r5, r3, r4 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r5, r3, r4 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r6, r5, 16 +; P9LE-NEXT: mulhwu r6, r6, r4 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: slwi r5, r5, 16 +; P9LE-NEXT: or r3, r5, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r6, r5, 16 +; P9LE-NEXT: mulhwu r4, r6, r4 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_2: @@ -233,34 +227,31 @@ ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r4 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r6, r5, 16 +; P9BE-NEXT: mulhwu r6, r6, r4 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: slwi r5, r5, 16 +; P9BE-NEXT: or r3, r5, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r6, r5, 16 +; P9BE-NEXT: mulhwu r4, r6, r4 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_2: @@ -273,29 +264,29 @@ ; P8LE-NEXT: rldicl r6, r4, 48, 48 ; P8LE-NEXT: rldicl r7, r4, 32, 48 ; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r6, 16 ; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r6, r6, 16 -; P8LE-NEXT: mulhwu r8, r5, r3 +; P8LE-NEXT: clrlwi r9, r4, 16 +; P8LE-NEXT: mulhwu r8, r8, r3 ; P8LE-NEXT: clrlwi r7, r7, 16 -; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: mulhwu r9, r6, r3 -; P8LE-NEXT: mulhwu r10, r7, r3 -; P8LE-NEXT: mulhwu r3, r4, r3 +; P8LE-NEXT: mulhwu r9, r9, r3 +; P8LE-NEXT: mulhwu r10, r5, r3 +; P8LE-NEXT: mulhwu r3, r7, r3 ; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r10, r10, 95 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: sub r5, r5, r8 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v2, r5 -; P8LE-NEXT: sub r5, r7, r10 -; P8LE-NEXT: sub r3, r4, r3 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: sub r6, r6, r8 +; P8LE-NEXT: sub r4, r4, r9 +; P8LE-NEXT: sub r5, r5, r10 +; P8LE-NEXT: slwi r6, r6, 16 +; P8LE-NEXT: sub r3, r7, r3 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: or r5, r6, r5 +; P8LE-NEXT: or r3, r4, r3 +; P8LE-NEXT: mtfprwz f0, r5 +; P8LE-NEXT: mtfprwz f1, r3 +; P8LE-NEXT: xxmrghw v2, vs1, vs0 ; P8LE-NEXT: blr ; ; P8BE-LABEL: fold_urem_vec_2: @@ -305,34 +296,31 @@ ; P8BE-NEXT: ori r3, r3, 55879 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: clrlwi r6, r6, 16 ; P8BE-NEXT: rldicl r4, r4, 16, 48 -; P8BE-NEXT: mulhwu r8, r5, r3 +; P8BE-NEXT: clrlwi r8, r6, 16 +; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: clrlwi r9, r4, 16 +; P8BE-NEXT: mulhwu r8, r8, r3 ; P8BE-NEXT: clrlwi r7, r7, 16 -; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r9, r6, r3 -; P8BE-NEXT: mulhwu r10, r7, r3 -; P8BE-NEXT: mulhwu r3, r4, r3 +; P8BE-NEXT: mulhwu r9, r9, r3 +; P8BE-NEXT: mulhwu r10, r5, r3 +; P8BE-NEXT: mulhwu r3, r7, r3 ; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: mulli r9, r9, 95 ; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sub r5, r5, r8 -; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: sub r4, r4, r9 +; P8BE-NEXT: sub r5, r5, r10 +; P8BE-NEXT: slwi r6, r6, 16 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: slwi r4, r4, 16 +; P8BE-NEXT: or r5, r6, r5 +; P8BE-NEXT: or r3, r4, r3 ; P8BE-NEXT: mtvsrwz v2, r5 -; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l -; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r7, r10 -; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: mtvsrwz v4, r6 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v2, v4, v2, v3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v3, v2 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -343,52 +331,52 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; P9LE-LABEL: combine_urem_udiv: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, 689 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 55879 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r5, r3, r4 -; P9LE-NEXT: mulli r6, r5, 95 -; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, 689 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r6, r3, 16 -; P9LE-NEXT: mulhwu r6, r6, r4 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r7, r3, 16 -; P9LE-NEXT: mulhwu r7, r7, r4 +; P9LE-NEXT: ori r5, r5, 55879 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: sub r3, r3, r6 +; P9LE-NEXT: li r6, 0 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: clrlwi r6, r6, 16 +; P9LE-NEXT: mulhwu r7, r6, r5 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: or r3, r3, r6 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r8, r3, 16 -; P9LE-NEXT: mulhwu r4, r8, r4 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: clrlwi r6, r3, 16 +; P9LE-NEXT: mulhwu r6, r6, r5 +; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: xxmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: xxmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 4 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: clrlwi r9, r8, 16 +; P9LE-NEXT: mulhwu r5, r9, r5 +; P9LE-NEXT: mulli r9, r5, 95 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: or r3, r3, r8 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: slwi r3, r4, 16 +; P9LE-NEXT: rlwimi r3, r7, 0, 22, 31 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 +; P9LE-NEXT: mtfprwz f0, r3 +; P9LE-NEXT: slwi r3, r6, 16 +; P9LE-NEXT: rlwimi r3, r5, 0, 22, 31 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v3, vs1, vs0 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; ; P9BE-LABEL: combine_urem_udiv: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: lis r5, 689 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r5, r5, 55879 @@ -396,41 +384,38 @@ ; P9BE-NEXT: mulhwu r4, r4, r5 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r6, r3, 16 -; P9BE-NEXT: mulhwu r6, r6, r5 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r7, r3, 16 -; P9BE-NEXT: xxperm vs0, vs1, vs2 +; P9BE-NEXT: li r6, 6 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: clrlwi r7, r6, 16 ; P9BE-NEXT: mulhwu r7, r7, r5 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtfprwz f1, r3 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: or r3, r3, r6 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r5 -; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: clrlwi r6, r3, 16 +; P9BE-NEXT: mulhwu r6, r6, r5 +; P9BE-NEXT: mulli r8, r6, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: mtfprwz f3, r5 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 -; P9BE-NEXT: mtfprwz f0, r4 -; P9BE-NEXT: mtfprwz f1, r6 -; P9BE-NEXT: xxperm vs0, vs1, vs2 -; P9BE-NEXT: mtfprwz f1, r7 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v3, vs1, vs0 +; P9BE-NEXT: li r8, 2 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: slwi r3, r3, 16 +; P9BE-NEXT: clrlwi r9, r8, 16 +; P9BE-NEXT: mulhwu r5, r9, r5 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: or r3, r3, r8 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: slwi r3, r4, 16 +; P9BE-NEXT: rlwimi r3, r7, 0, 22, 31 +; P9BE-NEXT: vmrgow v2, v2, v3 +; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: slwi r3, r6, 16 +; P9BE-NEXT: rlwimi r3, r5, 0, 22, 31 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vmrgow v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -438,88 +423,89 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 689 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r3, r3, 55879 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r6, r4, 48, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r8, r6, 16 -; P8LE-NEXT: rldicl r7, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 16, 48 -; P8LE-NEXT: mulhwu r9, r5, r3 +; P8LE-NEXT: rldicl r5, r4, 48, 48 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r5, 16 +; P8LE-NEXT: clrlwi r6, r6, 16 +; P8LE-NEXT: rldicl r4, r4, 32, 48 +; P8LE-NEXT: clrlwi r9, r7, 16 ; P8LE-NEXT: mulhwu r8, r8, r3 -; P8LE-NEXT: clrlwi r10, r7, 16 -; P8LE-NEXT: clrlwi r11, r4, 16 -; P8LE-NEXT: mulhwu r10, r10, r3 -; P8LE-NEXT: mulhwu r3, r11, r3 -; P8LE-NEXT: mulli r11, r9, 95 -; P8LE-NEXT: mtvsrd v2, r9 -; P8LE-NEXT: mulli r9, r8, 95 -; P8LE-NEXT: mtvsrd v3, r8 -; P8LE-NEXT: mulli r8, r10, 95 -; P8LE-NEXT: mtvsrd v4, r10 -; P8LE-NEXT: mulli r10, r3, 95 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: sub r5, r5, r11 -; P8LE-NEXT: sub r6, r6, r9 -; P8LE-NEXT: mtvsrd v3, r5 -; P8LE-NEXT: sub r5, r7, r8 -; P8LE-NEXT: mtvsrd v5, r6 -; P8LE-NEXT: sub r4, r4, r10 -; P8LE-NEXT: mtvsrd v0, r5 -; P8LE-NEXT: mtvsrd v1, r4 -; P8LE-NEXT: vmrghh v3, v5, v3 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v0, v1, v0 -; P8LE-NEXT: vmrghh v4, v5, v4 -; P8LE-NEXT: xxmrglw v3, v0, v3 -; P8LE-NEXT: xxmrglw v2, v4, v2 -; P8LE-NEXT: vadduhm v2, v3, v2 +; P8LE-NEXT: clrlwi r10, r4, 16 +; P8LE-NEXT: mulhwu r11, r6, r3 +; P8LE-NEXT: mulhwu r9, r9, r3 +; P8LE-NEXT: mulhwu r3, r10, r3 +; P8LE-NEXT: mulli r10, r8, 95 +; P8LE-NEXT: slwi r8, r8, 16 +; P8LE-NEXT: mulli r12, r11, 95 +; P8LE-NEXT: mulli r0, r9, 95 +; P8LE-NEXT: mulli r30, r3, 95 +; P8LE-NEXT: rlwimi r8, r11, 0, 22, 31 +; P8LE-NEXT: mtfprwz f0, r8 +; P8LE-NEXT: sub r5, r5, r10 +; P8LE-NEXT: sub r6, r6, r12 +; P8LE-NEXT: sub r7, r7, r0 +; P8LE-NEXT: slwi r5, r5, 16 +; P8LE-NEXT: sub r4, r4, r30 +; P8LE-NEXT: slwi r7, r7, 16 +; P8LE-NEXT: or r5, r5, r6 +; P8LE-NEXT: slwi r6, r9, 16 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: or r4, r7, r4 +; P8LE-NEXT: rlwimi r6, r3, 0, 22, 31 +; P8LE-NEXT: mtfprwz f1, r5 +; P8LE-NEXT: mtfprwz f2, r4 +; P8LE-NEXT: mtfprwz f3, r6 +; P8LE-NEXT: xxmrghw v2, vs2, vs1 +; P8LE-NEXT: xxmrghw v3, vs3, vs0 +; P8LE-NEXT: vadduhm v2, v2, v3 ; P8LE-NEXT: blr ; ; P8BE-LABEL: combine_urem_udiv: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 689 -; P8BE-NEXT: addis r11, r2, .LCPI2_0@toc@ha +; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8BE-NEXT: ori r3, r3, 55879 -; P8BE-NEXT: addi r11, r11, .LCPI2_0@toc@l -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: lxvw4x v2, 0, r11 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r6, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: clrlwi r8, r5, 16 ; P8BE-NEXT: clrlwi r9, r6, 16 -; P8BE-NEXT: rldicl r7, r4, 32, 48 -; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: clrlwi r10, r7, 16 ; P8BE-NEXT: mulhwu r8, r8, r3 +; P8BE-NEXT: clrlwi r11, r4, 16 ; P8BE-NEXT: mulhwu r9, r9, r3 -; P8BE-NEXT: clrlwi r10, r7, 16 -; P8BE-NEXT: clrlwi r4, r4, 16 ; P8BE-NEXT: mulhwu r10, r10, r3 -; P8BE-NEXT: mulhwu r3, r4, r3 -; P8BE-NEXT: mulli r12, r8, 95 -; P8BE-NEXT: mtvsrwz v3, r8 -; P8BE-NEXT: mulli r8, r9, 95 -; P8BE-NEXT: mtvsrwz v4, r9 -; P8BE-NEXT: mulli r9, r10, 95 -; P8BE-NEXT: mtvsrwz v5, r10 -; P8BE-NEXT: mulli r10, r3, 95 -; P8BE-NEXT: vperm v3, v4, v3, v2 -; P8BE-NEXT: sub r5, r5, r12 -; P8BE-NEXT: sub r6, r6, r8 -; P8BE-NEXT: mtvsrwz v4, r5 -; P8BE-NEXT: sub r5, r7, r9 -; P8BE-NEXT: mtvsrwz v0, r6 -; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrwz v1, r5 -; P8BE-NEXT: mtvsrwz v6, r4 -; P8BE-NEXT: vperm v4, v0, v4, v2 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v1, v6, v1, v2 -; P8BE-NEXT: vperm v2, v0, v5, v2 -; P8BE-NEXT: xxmrghw v4, v1, v4 -; P8BE-NEXT: xxmrghw v2, v2, v3 -; P8BE-NEXT: vadduhm v2, v4, v2 +; P8BE-NEXT: mulhwu r3, r11, r3 +; P8BE-NEXT: mulli r11, r8, 95 +; P8BE-NEXT: slwi r8, r8, 16 +; P8BE-NEXT: mulli r12, r9, 95 +; P8BE-NEXT: mulli r0, r10, 95 +; P8BE-NEXT: mulli r30, r3, 95 +; P8BE-NEXT: rlwimi r8, r9, 0, 22, 31 +; P8BE-NEXT: mtvsrwz v2, r8 +; P8BE-NEXT: sub r5, r5, r11 +; P8BE-NEXT: sub r6, r6, r12 +; P8BE-NEXT: sub r7, r7, r0 +; P8BE-NEXT: slwi r5, r5, 16 +; P8BE-NEXT: sub r4, r4, r30 +; P8BE-NEXT: slwi r7, r7, 16 +; P8BE-NEXT: or r5, r5, r6 +; P8BE-NEXT: slwi r6, r10, 16 +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: or r4, r7, r4 +; P8BE-NEXT: rlwimi r6, r3, 0, 22, 31 +; P8BE-NEXT: mtvsrwz v3, r5 +; P8BE-NEXT: mtvsrwz v4, r4 +; P8BE-NEXT: mtvsrwz v5, r6 +; P8BE-NEXT: vmrgow v3, v4, v3 +; P8BE-NEXT: vmrgow v2, v5, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -531,61 +517,52 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; P9LE-LABEL: dont_fold_urem_power_of_two: ; P9LE: # %bb.0: -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, 689 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 55879 -; P9LE-NEXT: clrlwi r3, r3, 26 -; P9LE-NEXT: mtvsrd v3, r3 +; P9LE-NEXT: li r4, 0 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: lis r5, 689 +; P9LE-NEXT: vextuhrx r4, r4, v2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 27 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: ori r5, r5, 55879 +; P9LE-NEXT: clrlwi r4, r4, 26 +; P9LE-NEXT: rlwimi r4, r3, 16, 11, 15 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: mtfprwz f0, r4 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 29 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: xxmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 29 +; P9LE-NEXT: rlwimi r4, r3, 16, 0, 15 +; P9LE-NEXT: mtfprwz f1, r4 +; P9LE-NEXT: xxmrghw v2, vs1, vs0 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_power_of_two: ; P9BE: # %bb.0: -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: lis r4, 689 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 55879 -; P9BE-NEXT: clrlwi r3, r3, 27 -; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: li r4, 2 ; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 26 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: clrlwi r4, r4, 27 +; P9BE-NEXT: rlwimi r4, r3, 16, 10, 15 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: mtvsrwz v3, r4 +; P9BE-NEXT: lis r4, 689 +; P9BE-NEXT: ori r4, r4, 55879 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 29 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs0, vs1 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: rlwinm r4, r4, 16, 13, 15 +; P9BE-NEXT: or r3, r4, r3 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_power_of_two: @@ -595,51 +572,42 @@ ; P8LE-NEXT: ori r3, r3, 55879 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: clrldi r6, r4, 48 -; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r6, r6, 26 -; P8LE-NEXT: mulhwu r3, r5, r3 ; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: mtvsrd v2, r6 +; P8LE-NEXT: clrlwi r6, r5, 16 +; P8LE-NEXT: mulhwu r3, r6, r3 +; P8LE-NEXT: clrldi r6, r4, 48 ; P8LE-NEXT: rldicl r4, r4, 32, 48 -; P8LE-NEXT: clrlwi r6, r7, 27 +; P8LE-NEXT: clrlwi r6, r6, 26 ; P8LE-NEXT: clrlwi r4, r4, 29 -; P8LE-NEXT: mtvsrd v3, r6 -; P8LE-NEXT: mtvsrd v4, r4 +; P8LE-NEXT: rlwimi r6, r7, 16, 11, 15 +; P8LE-NEXT: mtfprwz f0, r6 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: rlwimi r4, r3, 16, 0, 15 +; P8LE-NEXT: mtfprwz f1, r4 +; P8LE-NEXT: xxmrghw v2, vs1, vs0 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_power_of_two: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 689 -; P8BE-NEXT: addis r7, r2, .LCPI3_0@toc@ha ; P8BE-NEXT: ori r3, r3, 55879 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 32, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: clrlwi r6, r6, 27 ; P8BE-NEXT: mulhwu r3, r5, r3 -; P8BE-NEXT: rldicl r8, r4, 16, 48 -; P8BE-NEXT: mtvsrwz v2, r6 -; P8BE-NEXT: addi r6, r7, .LCPI3_0@toc@l ; P8BE-NEXT: rldicl r4, r4, 48, 48 -; P8BE-NEXT: clrlwi r7, r8, 26 -; P8BE-NEXT: lxvw4x v3, 0, r6 -; P8BE-NEXT: clrlwi r4, r4, 29 -; P8BE-NEXT: mtvsrwz v4, r7 -; P8BE-NEXT: mtvsrwz v0, r4 +; P8BE-NEXT: rlwinm r4, r4, 16, 13, 15 +; P8BE-NEXT: rlwimi r6, r7, 16, 10, 15 +; P8BE-NEXT: mtvsrwz v2, r6 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: mtvsrwz v5, r3 -; P8BE-NEXT: vperm v3, v0, v5, v3 -; P8BE-NEXT: xxmrghw v2, v2, v3 +; P8BE-NEXT: or r3, r4, r3 +; P8BE-NEXT: mtvsrwz v3, r3 +; P8BE-NEXT: vmrgow v2, v2, v3 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -651,147 +619,131 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, 2849 +; P9LE-NEXT: lis r6, 12 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 25645 +; P9LE-NEXT: ori r6, r6, 5560 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 12 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 5560 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 100 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r5, r4, 16 +; P9LE-NEXT: mulhwu r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, 100 +; P9LE-NEXT: slwi r4, r4, 16 +; P9LE-NEXT: ori r5, r5, 13629 +; P9LE-NEXT: or r3, r4, r3 +; P9LE-NEXT: mtfprwz f0, r3 ; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: ori r4, r4, 13629 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: xxmrglw v2, v3, v2 +; P9LE-NEXT: slwi r3, r3, 16 +; P9LE-NEXT: mtfprwz f1, r3 +; P9LE-NEXT: xxmrghw v2, vs0, vs1 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_one: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 12 +; P9BE-NEXT: lis r6, 2849 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 5560 +; P9BE-NEXT: ori r6, r6, 25645 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 2849 -; P9BE-NEXT: mtfprwz f0, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: ori r4, r4, 25645 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: mulli r4, r4, 23 -; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r5, r4, 16 +; P9BE-NEXT: mulhwu r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 23 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: slwi r4, r4, 16 +; P9BE-NEXT: or r3, r4, r3 ; P9BE-NEXT: lis r4, 100 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: ori r4, r4, 13629 -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxv vs2, 0(r3) +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: ori r4, r4, 13629 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: xxperm vs0, vs1, vs2 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: mulli r4, r4, 654 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtfprwz f1, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtfprwz f3, r3 -; P9BE-NEXT: xxperm vs1, vs3, vs2 -; P9BE-NEXT: xxmrghw v2, vs1, vs0 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vmrgow v2, v2, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r3, 100 +; P8LE-NEXT: lis r3, 12 ; P8LE-NEXT: lis r7, 2849 -; P8LE-NEXT: lis r8, 12 -; P8LE-NEXT: li r9, 0 -; P8LE-NEXT: ori r3, r3, 13629 +; P8LE-NEXT: lis r9, 100 +; P8LE-NEXT: ori r3, r3, 5560 ; P8LE-NEXT: ori r7, r7, 25645 -; P8LE-NEXT: ori r8, r8, 5560 -; P8LE-NEXT: mtvsrd v2, r9 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: rldicl r5, r4, 48, 48 -; P8LE-NEXT: rldicl r6, r4, 32, 48 -; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: rldicl r5, r4, 32, 48 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: clrlwi r8, r6, 16 ; P8LE-NEXT: clrlwi r5, r5, 16 -; P8LE-NEXT: clrlwi r6, r6, 16 -; P8LE-NEXT: mulhwu r3, r5, r3 -; P8LE-NEXT: clrlwi r4, r4, 16 -; P8LE-NEXT: mulhwu r7, r6, r7 -; P8LE-NEXT: mulhwu r8, r4, r8 -; P8LE-NEXT: mulli r3, r3, 654 +; P8LE-NEXT: mulhwu r3, r8, r3 +; P8LE-NEXT: ori r8, r9, 13629 +; P8LE-NEXT: clrlwi r9, r4, 16 +; P8LE-NEXT: mulhwu r7, r5, r7 +; P8LE-NEXT: mulhwu r8, r9, r8 +; P8LE-NEXT: mulli r3, r3, 5423 ; P8LE-NEXT: mulli r7, r7, 23 -; P8LE-NEXT: mulli r8, r8, 5423 -; P8LE-NEXT: sub r3, r5, r3 -; P8LE-NEXT: sub r5, r6, r7 -; P8LE-NEXT: mtvsrd v3, r3 -; P8LE-NEXT: sub r3, r4, r8 -; P8LE-NEXT: mtvsrd v4, r5 -; P8LE-NEXT: mtvsrd v5, r3 -; P8LE-NEXT: vmrghh v2, v3, v2 -; P8LE-NEXT: vmrghh v3, v5, v4 -; P8LE-NEXT: xxmrglw v2, v3, v2 +; P8LE-NEXT: mulli r8, r8, 654 +; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: sub r5, r5, r7 +; P8LE-NEXT: slwi r3, r3, 16 +; P8LE-NEXT: sub r4, r4, r8 +; P8LE-NEXT: or r3, r3, r5 +; P8LE-NEXT: slwi r4, r4, 16 +; P8LE-NEXT: mtfprwz f0, r3 +; P8LE-NEXT: mtfprwz f1, r4 +; P8LE-NEXT: xxmrghw v2, vs0, vs1 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_one: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 12 -; P8BE-NEXT: lis r7, 2849 +; P8BE-NEXT: lis r3, 2849 +; P8BE-NEXT: lis r6, 12 +; P8BE-NEXT: ori r3, r3, 25645 +; P8BE-NEXT: ori r6, r6, 5560 +; P8BE-NEXT: rldicl r5, r4, 48, 48 +; P8BE-NEXT: clrldi r7, r4, 48 +; P8BE-NEXT: clrlwi r8, r5, 16 +; P8BE-NEXT: clrlwi r7, r7, 16 +; P8BE-NEXT: mulhwu r3, r8, r3 ; P8BE-NEXT: lis r8, 100 -; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha -; P8BE-NEXT: li r10, 0 -; P8BE-NEXT: ori r3, r3, 5560 -; P8BE-NEXT: ori r7, r7, 25645 -; P8BE-NEXT: ori r8, r8, 13629 -; P8BE-NEXT: mtvsrwz v2, r10 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: mulhwu r3, r5, r3 +; P8BE-NEXT: mulhwu r6, r7, r6 +; P8BE-NEXT: ori r8, r8, 13629 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulhwu r7, r6, r7 ; P8BE-NEXT: mulhwu r8, r4, r8 -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: mulli r7, r7, 23 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: mulli r6, r6, 5423 ; P8BE-NEXT: mulli r8, r8, 654 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l -; P8BE-NEXT: lxvw4x v3, 0, r5 -; P8BE-NEXT: sub r5, r6, r7 -; P8BE-NEXT: mtvsrwz v4, r3 -; P8BE-NEXT: sub r3, r4, r8 -; P8BE-NEXT: mtvsrwz v5, r5 -; P8BE-NEXT: mtvsrwz v0, r3 -; P8BE-NEXT: vperm v4, v5, v4, v3 -; P8BE-NEXT: vperm v2, v2, v0, v3 -; P8BE-NEXT: xxmrghw v2, v2, v4 +; P8BE-NEXT: sub r5, r7, r6 +; P8BE-NEXT: slwi r3, r3, 16 +; P8BE-NEXT: or r3, r3, r5 +; P8BE-NEXT: sub r4, r4, r8 +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: mtvsrwz v3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -28,80 +28,51 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LE-P8-LABEL: test_v16i8_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: lbz r3, 0(r3) ; CHECK-LE-P8-NEXT: lbz r4, 0(r4) -; CHECK-LE-P8-NEXT: mtvsrd v2, r3 -; CHECK-LE-P8-NEXT: mtvsrd v3, r4 -; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: lbz r3, 0(r3) +; CHECK-LE-P8-NEXT: slwi r4, r4, 16 +; CHECK-LE-P8-NEXT: or r3, r4, r3 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v16i8_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 -; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 -; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: lbz r4, 0(r4) +; CHECK-LE-P9-NEXT: lbz r3, 0(r3) +; CHECK-LE-P9-NEXT: slwi r4, r4, 16 +; CHECK-LE-P9-NEXT: or r3, r4, r3 +; CHECK-LE-P9-NEXT: mtvsrws v2, r3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v16i8_v16i8: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-BE-P8-NEXT: lbz r4, 0(r4) -; CHECK-BE-P8-NEXT: lbz r3, 0(r3) -; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l -; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 -; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 -; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 -; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: xxleqv v2, v2, v2 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v16i8_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r4 -; CHECK-BE-P9-NEXT: lxsibzx f1, 0, r3 -; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI0_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs0, 0(r5) -; CHECK-BE-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-P9-NEXT: xxleqv v2, v2, v2 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 -; CHECK-AIX-64-P8-NEXT: lbz r4, 0(r4) -; CHECK-AIX-64-P8-NEXT: lbz r3, 0(r3) -; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 -; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5 -; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 -; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P8-NEXT: xxleqv v2, v2, v2 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: ld r5, L..C0(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r4 -; CHECK-AIX-64-P9-NEXT: lxsibzx f1, 0, r3 -; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r5) -; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: xxleqv v2, v2, v2 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r5, L..C0(r2) # %const.0 -; CHECK-AIX-32-P8-NEXT: lbz r4, 0(r4) -; CHECK-AIX-32-P8-NEXT: lbz r3, 0(r3) -; CHECK-AIX-32-P8-NEXT: mtvsrwz v3, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 -; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 -; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-32-P8-NEXT: xxleqv v2, v2, v2 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lwz r5, L..C0(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r4 -; CHECK-AIX-32-P9-NEXT: lxsibzx f1, 0, r3 -; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r5) -; CHECK-AIX-32-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: xxleqv v2, v2, v2 ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = load <1 x i8>, ptr %a, align 4 @@ -150,7 +121,7 @@ ; ; CHECK-AIX-64-P8-LABEL: test_v16i8_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 @@ -164,7 +135,7 @@ ; ; CHECK-AIX-32-P8-LABEL: test_v16i8_none: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 @@ -225,7 +196,7 @@ ; ; CHECK-AIX-64-P8-LABEL: test_none_v16i8: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C1(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 @@ -235,7 +206,7 @@ ; CHECK-AIX-64-P9-LABEL: test_none_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 @@ -473,7 +444,7 @@ ; ; CHECK-AIX-64-P8-LABEL: test_none_v8i16: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 @@ -483,7 +454,7 @@ ; CHECK-AIX-64-P9-LABEL: test_none_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 @@ -547,7 +518,7 @@ ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 @@ -561,7 +532,7 @@ ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_none: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 @@ -783,9 +754,9 @@ ; ; CHECK-AIX-64-P8-LABEL: test_none_v4i32: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 -; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C5(r2) # %const.1 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 ; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 @@ -801,12 +772,12 @@ ; ; CHECK-AIX-32-P8-LABEL: test_none_v4i32: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C2(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) ; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 ; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 ; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 @@ -876,7 +847,7 @@ ; CHECK-AIX-64-P8-LABEL: test_v4i32_none: ; CHECK-AIX-64-P8: # %bb.0: # %entry ; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 -; CHECK-AIX-64-P8-NEXT: ld r5, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C6(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 ; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 ; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 @@ -887,7 +858,7 @@ ; CHECK-AIX-64-P9-LABEL: test_v4i32_none: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r4 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 @@ -897,7 +868,7 @@ ; CHECK-AIX-32-P8-LABEL: test_v4i32_none: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4 -; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C4(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 ; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 @@ -908,7 +879,7 @@ ; CHECK-AIX-32-P9-LABEL: test_v4i32_none: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7 @@ -1146,7 +1117,7 @@ ; CHECK-AIX-32-P8-LABEL: test_1_2: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r5, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 @@ -1157,7 +1128,7 @@ ; CHECK-AIX-32-P9-LABEL: test_1_2: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: vspltb v3, v2, 7 ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3) @@ -1238,7 +1209,7 @@ ; CHECK-AIX-32-P8-LABEL: test_none_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry ; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C6(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 @@ -1249,7 +1220,7 @@ ; CHECK-AIX-32-P9-LABEL: test_none_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: vspltb v3, v2, 7 ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3) @@ -1360,74 +1331,64 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) { ; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: mtvsrd v2, r3 -; CHECK-LE-P8-NEXT: mtvsrd v3, r4 -; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: slwi r4, r4, 16 +; CHECK-LE-P8-NEXT: or r3, r4, r3 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtvsrd v2, r3 -; CHECK-LE-P9-NEXT: mtvsrd v3, r4 -; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: slwi r4, r4, 16 +; CHECK-LE-P9-NEXT: or r3, r4, r3 +; CHECK-LE-P9-NEXT: mtvsrws v2, r3 ; CHECK-LE-P9-NEXT: blr ; ; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI16_0@toc@ha -; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 -; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI16_0@toc@l -; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 -; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 -; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: slwi r3, r3, 16 +; CHECK-BE-P8-NEXT: or r3, r3, r4 +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 ; CHECK-BE-P8-NEXT: blr ; ; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI16_0@toc@ha -; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 -; CHECK-BE-P9-NEXT: mtfprwz f1, r3 -; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI16_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs0, 0(r5) -; CHECK-BE-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-P9-NEXT: slwi r3, r3, 16 +; CHECK-BE-P9-NEXT: or r3, r3, r4 +; CHECK-BE-P9-NEXT: mtvsrws v2, r3 ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 -; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 -; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 -; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 -; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: slwi r3, r3, 16 +; CHECK-AIX-64-P8-NEXT: or r3, r3, r4 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 ; CHECK-AIX-64-P8-NEXT: blr ; ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 -; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r3 -; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r5) -; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: slwi r3, r3, 16 +; CHECK-AIX-64-P9-NEXT: or r3, r3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3 ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 -; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: slwi r3, r3, 16 +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: or r3, r3, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 ; CHECK-AIX-32-P8-NEXT: blr ; ; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) -; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: slwi r3, r3, 16 +; CHECK-AIX-32-P9-NEXT: or r3, r3, r4 +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) ; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) -; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-32-P9-NEXT: blr entry: %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 @@ -1793,7 +1754,7 @@ ; ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 ; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 @@ -1804,7 +1765,7 @@ ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: ; CHECK-AIX-32-P9: # %bb.0: # %entry ; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r4 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-32-P9-NEXT: xxperm v2, vs0, vs1 @@ -2027,7 +1988,7 @@ ; ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: ; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C7(r2) # %const.0 ; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 ; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 ; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 @@ -2038,7 +1999,7 @@ ; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) @@ -2055,7 +2016,7 @@ ; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) ; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C8(r2) # %const.0 ; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 ; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 ; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 @@ -2070,7 +2031,7 @@ ; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) ; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) ; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 ; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) ; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs2, vs1 ; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll --- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll +++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll @@ -117,55 +117,45 @@ define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) { ; CHECK-LABEL: test8x24: ; CHECK: # %bb.0: -; CHECK-NEXT: mtvsrd v2, r3 -; CHECK-NEXT: mtvsrd v3, r4 -; CHECK-NEXT: mtvsrd v4, r5 -; CHECK-NEXT: mtvsrd v5, r6 -; CHECK-NEXT: mtvsrd v0, r7 -; CHECK-NEXT: mtvsrd v1, r8 -; CHECK-NEXT: vmrghh v2, v3, v2 -; CHECK-NEXT: mtvsrd v3, r9 -; CHECK-NEXT: vmrghh v4, v5, v4 -; CHECK-NEXT: mtvsrd v5, r10 -; CHECK-NEXT: vmrghh v0, v1, v0 -; CHECK-NEXT: vmrghh v3, v5, v3 -; CHECK-NEXT: xxmrglw vs0, v4, v2 -; CHECK-NEXT: xxmrglw vs1, v3, v0 -; CHECK-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-NEXT: slwi r6, r6, 16 +; CHECK-NEXT: slwi r4, r4, 16 +; CHECK-NEXT: slwi r10, r10, 16 +; CHECK-NEXT: slwi r8, r8, 16 +; CHECK-NEXT: or r5, r6, r5 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: or r4, r10, r9 +; CHECK-NEXT: or r6, r8, r7 +; CHECK-NEXT: rldimi r3, r5, 32, 0 +; CHECK-NEXT: rldimi r6, r4, 32, 0 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: mtfprd f1, r6 +; CHECK-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test8x24: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: sth r10, -16(r1) -; CHECK-BE-NEXT: sth r9, -32(r1) -; CHECK-BE-NEXT: sth r8, -48(r1) -; CHECK-BE-NEXT: sth r7, -64(r1) -; CHECK-BE-NEXT: sth r6, -80(r1) -; CHECK-BE-NEXT: sth r5, -96(r1) -; CHECK-BE-NEXT: sth r4, -112(r1) -; CHECK-BE-NEXT: sth r3, -128(r1) +; CHECK-BE-NEXT: slwi r9, r9, 16 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: slwi r3, r3, 16 +; CHECK-BE-NEXT: or r9, r9, r10 +; CHECK-BE-NEXT: or r7, r7, r8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: stw r9, -16(r1) +; CHECK-BE-NEXT: stw r7, -32(r1) +; CHECK-BE-NEXT: stw r5, -48(r1) +; CHECK-BE-NEXT: stw r3, -64(r1) ; CHECK-BE-NEXT: addi r3, r1, -16 -; CHECK-BE-NEXT: lxvw4x v2, 0, r3 +; CHECK-BE-NEXT: lxvw4x vs0, 0, r3 ; CHECK-BE-NEXT: addi r3, r1, -32 -; CHECK-BE-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-NEXT: lxvw4x vs1, 0, r3 ; CHECK-BE-NEXT: addi r3, r1, -48 -; CHECK-BE-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-NEXT: lxvw4x vs2, 0, r3 ; CHECK-BE-NEXT: addi r3, r1, -64 -; CHECK-BE-NEXT: lxvw4x v5, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -80 -; CHECK-BE-NEXT: lxvw4x v0, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -96 -; CHECK-BE-NEXT: lxvw4x v1, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -112 -; CHECK-BE-NEXT: lxvw4x v6, 0, r3 -; CHECK-BE-NEXT: addi r3, r1, -128 -; CHECK-BE-NEXT: lxvw4x v7, 0, r3 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: vmrghh v3, v5, v4 -; CHECK-BE-NEXT: vmrghh v4, v1, v0 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: vmrghh v5, v7, v6 -; CHECK-BE-NEXT: xxmrghw vs1, v5, v4 +; CHECK-BE-NEXT: lxvw4x vs3, 0, r3 +; CHECK-BE-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-NEXT: xxmrghw vs1, vs3, vs2 ; CHECK-BE-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-BE-NEXT: blr %i11 = trunc i32 %i1 to i24 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -20,12 +20,8 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt: @@ -33,37 +29,26 @@ ; CHECK-P9-NEXT: mtfprd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f2, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs2, vs1 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <2 x float> @@ -75,87 +60,78 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghh v3, v4, v3 -; CHECK-P8-NEXT: vmrghh v2, v2, v5 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P8-NEXT: mtfprwz f0, r5 +; CHECK-P8-NEXT: mtfprwz f1, r6 +; CHECK-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvspdpn f1, v2 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 1 +; CHECK-P9-NEXT: mtfprwz f0, r4 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mtfprwz f1, r4 +; CHECK-P9-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: xxperm vs1, vs2, vs0 -; CHECK-BE-NEXT: xscvspdpn f2, v2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm vs3, vs2, vs0 -; CHECK-BE-NEXT: xxmrghw vs0, vs3, vs1 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xscvspdpn f0, v2 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %0 = fptoui <4 x float> %a to <4 x i16> @@ -166,159 +142,134 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f3, vs1 ; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f2, vs0 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f4, v3 +; CHECK-P8-NEXT: xscvspdpn f1, v2 +; CHECK-P8-NEXT: xscvdpsxws f0, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r3, f2 ; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f6 -; CHECK-P8-NEXT: xscvdpsxws f2, f7 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v2, v2, v4 -; CHECK-P8-NEXT: vmrghh v3, v3, v5 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v4, v4, v0 -; CHECK-P8-NEXT: vmrghh v5, v5, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: rlwimi r7, r5, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r5, f5 +; CHECK-P8-NEXT: mffprwz r9, f0 +; CHECK-P8-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: rlwimi r9, r6, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r8, r3, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r5, r9, 32, 0 +; CHECK-P8-NEXT: rldimi r8, r7, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r5 +; CHECK-P8-NEXT: mtfprd f0, r8 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: xxperm vs3, vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f4, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs4, vs2 -; CHECK-BE-NEXT: xxswapd vs4, vs0 -; CHECK-BE-NEXT: xxmrghw vs1, vs1, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: xxperm vs3, vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f4, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs4, vs2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs3 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, ptr %0, align 32 @@ -329,309 +280,268 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs5, 0, r4 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r5 +; CHECK-P8-NEXT: xscvspdpn f6, vs5 +; CHECK-P8-NEXT: xxswapd v5, vs5 +; CHECK-P8-NEXT: xscvspdpn f1, vs0 ; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f4, vs1 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f7, v2 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f5, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 -; CHECK-P8-NEXT: xscvspdpn f10, v4 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs2 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xxswapd v4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs4 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs8, v5, v5, 3 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f3, v0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: vmrghh v2, v2, v1 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: vmrghh v3, v3, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f10 -; CHECK-P8-NEXT: vmrghh v4, v5, v1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f3, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: mtvsrd v6, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: mffprwz r6, f2 +; CHECK-P8-NEXT: xscvspdpn f2, v4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvdpsxws f1, f4 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f6 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, vs2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r8, f4 +; CHECK-P8-NEXT: xscvspdpn f4, v5 +; CHECK-P8-NEXT: mffprwz r10, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: mffprwz r11, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v6, v6, v8 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v5, v5, v9 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v7, v7, v8 -; CHECK-P8-NEXT: xxmrglw vs1, v6, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: vmrghh v0, v0, v8 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v7, v5 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxmrglw vs3, v0, v1 -; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f8, f8 +; CHECK-P8-NEXT: mffprwz r30, f5 +; CHECK-P8-NEXT: mffprwz r0, f0 +; CHECK-P8-NEXT: mffprwz r28, f6 +; CHECK-P8-NEXT: rlwimi r30, r4, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r4, f7 +; CHECK-P8-NEXT: rlwimi r0, r9, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r12, f4 +; CHECK-P8-NEXT: rlwimi r28, r6, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r30, r0, 32, 0 +; CHECK-P8-NEXT: mffprwz r29, f3 +; CHECK-P8-NEXT: rlwimi r4, r7, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: mffprwz r6, f2 +; CHECK-P8-NEXT: rlwimi r29, r10, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: rlwimi r9, r11, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r28, r29, 32, 0 +; CHECK-P8-NEXT: rlwimi r6, r12, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r4, r9, 32, 0 +; CHECK-P8-NEXT: mtfprd f0, r30 +; CHECK-P8-NEXT: rlwimi r7, r8, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f1, r28 +; CHECK-P8-NEXT: rldimi r7, r6, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r4 +; CHECK-P8-NEXT: mtfprd f3, r7 +; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs0, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: xscvspdpn f4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-P9-NEXT: xscvspdpn f3, vs2 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 16(r4) +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-P9-NEXT: rlwimi r4, r5, 16, 0, 15 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f4 +; CHECK-P9-NEXT: xxsldwi vs2, vs3, vs3, 1 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: rlwimi r5, r6, 16, 0, 15 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs1 +; CHECK-P9-NEXT: mffprwz r7, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: vmrghh v5, v0, v5 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs3, vs0 -; CHECK-P9-NEXT: xscvspdpn f4, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mffprwz r6, f2 +; CHECK-P9-NEXT: mffprwz r8, f3 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: rlwimi r8, r6, 16, 0, 15 +; CHECK-P9-NEXT: mffprwz r6, f4 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r8, r7, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxmrglw vs3, v4, v5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: xxmrgld vs2, vs3, vs2 -; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mtvsrdd vs2, r8, r5 ; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: mffprwz r7, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r7, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs0, vs1, vs0 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r6, r7 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv vs3, 0(r5) -; CHECK-BE-NEXT: xscvspdpn f6, vs2 -; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 3 -; CHECK-BE-NEXT: xscvspdpn f9, vs1 -; CHECK-BE-NEXT: xxswapd vs5, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs7, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs8, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xxsldwi vs10, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs11, vs0 -; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 48(r4) +; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f9, f9 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f10, vs10 -; CHECK-BE-NEXT: xscvspdpn f11, vs11 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: lxv vs3, 0(r4) +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs2 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-BE-NEXT: rlwimi r4, r5, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: xscvdpsxws f8, f8 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f10, f10 -; CHECK-BE-NEXT: xscvdpsxws f11, f11 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtfprwz f6, r5 -; CHECK-BE-NEXT: mffprwz r5, f9 -; CHECK-BE-NEXT: mtfprwz f9, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtfprwz f4, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtfprwz f5, r5 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxperm vs4, vs5, vs3 -; CHECK-BE-NEXT: xscvspdpn f5, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtfprwz f2, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtfprwz f7, r5 -; CHECK-BE-NEXT: mffprwz r5, f8 -; CHECK-BE-NEXT: xxperm vs2, vs6, vs3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtfprwz f8, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs4 -; CHECK-BE-NEXT: lxv vs4, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtfprwz f1, r5 -; CHECK-BE-NEXT: xxperm vs7, vs8, vs3 -; CHECK-BE-NEXT: mffprwz r5, f10 -; CHECK-BE-NEXT: xxperm vs1, vs9, vs3 -; CHECK-BE-NEXT: mtfprwz f10, r5 -; CHECK-BE-NEXT: mffprwz r5, f11 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f11, r5 -; CHECK-BE-NEXT: xxmrghw vs1, vs1, vs7 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs10, vs11, vs3 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-BE-NEXT: mtfprwz f0, r4 -; CHECK-BE-NEXT: xxperm vs0, vs5, vs3 -; CHECK-BE-NEXT: xxswapd vs5, vs4 +; CHECK-BE-NEXT: xxsldwi vs2, vs3, vs3, 1 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: rlwimi r5, r6, 16, 0, 15 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) -; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs10 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: mtfprwz f2, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs3 -; CHECK-BE-NEXT: xscvspdpn f5, vs4 -; CHECK-BE-NEXT: xxsldwi vs4, vs4, vs4, 1 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs1 +; CHECK-BE-NEXT: mffprwz r7, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs3 +; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: mtfprwz f4, r4 -; CHECK-BE-NEXT: xxperm vs4, vs5, vs3 -; CHECK-BE-NEXT: xxmrghw vs2, vs4, vs2 -; CHECK-BE-NEXT: xxmrghd vs0, vs2, vs0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: mffprwz r8, f3 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: rlwimi r8, r6, 16, 0, 15 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r8, r7, 32, 0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtvsrdd vs2, r8, r5 +; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: mffprwz r7, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-BE-NEXT: rlwimi r7, r6, 16, 0, 15 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r7, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r6, r7 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -651,13 +561,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: @@ -669,33 +576,24 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f2, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs2, vs1 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <2 x float> @@ -707,87 +605,84 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f2, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghh v3, v4, v3 -; CHECK-P8-NEXT: vmrghh v2, v2, v5 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 16 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: or r3, r4, r6 +; CHECK-P8-NEXT: mtfprwz f1, r3 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: xxswapd vs1, v2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: xxperm vs1, vs2, vs0 -; CHECK-BE-NEXT: xscvspdpn f2, v2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm vs3, vs2, vs0 -; CHECK-BE-NEXT: xxmrghw vs0, vs3, vs1 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvspdpn f0, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %0 = fptosi <4 x float> %a to <4 x i16> @@ -798,159 +693,146 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f3, vs1 ; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v3 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvspdpn f2, vs0 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f4, v3 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f0, v2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 1 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f6 -; CHECK-P8-NEXT: xscvdpsxws f2, f7 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v2, v2, v4 -; CHECK-P8-NEXT: vmrghh v3, v3, v5 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v4, v4, v0 -; CHECK-P8-NEXT: vmrghh v5, v5, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mffprwz r5, f0 +; CHECK-P8-NEXT: mffprwz r6, f4 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: slwi r5, r5, 16 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: or r5, r5, r7 +; CHECK-P8-NEXT: mffprwz r10, f5 +; CHECK-P8-NEXT: or r4, r4, r10 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: or r6, r6, r9 +; CHECK-P8-NEXT: or r3, r3, r8 +; CHECK-P8-NEXT: rldimi r4, r6, 32, 0 +; CHECK-P8-NEXT: rldimi r3, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r4 +; CHECK-P8-NEXT: mtfprd f0, r3 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r4, r5, r4 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r3, r5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: xxperm vs3, vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f4, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs4, vs2 -; CHECK-BE-NEXT: xxswapd vs4, vs0 -; CHECK-BE-NEXT: xxmrghw vs1, vs1, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: xxperm vs3, vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f4, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r3, r5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtfprwz f4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs4, vs2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs3 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, ptr %0, align 32 @@ -961,206 +843,202 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs5, 0, r4 +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r5 +; CHECK-P8-NEXT: xscvspdpn f6, vs5 +; CHECK-P8-NEXT: xxswapd v5, vs5 +; CHECK-P8-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: xscvspdpn f1, vs0 ; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f4, vs1 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f7, v2 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f5, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 -; CHECK-P8-NEXT: xscvspdpn f10, v4 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs2 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xscvspdpn f0, vs4 +; CHECK-P8-NEXT: xxswapd v4, vs4 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs8, v5, v5, 3 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f3, v0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: vmrghh v2, v2, v1 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: vmrghh v3, v3, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f10 -; CHECK-P8-NEXT: vmrghh v4, v5, v1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f3, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: mtvsrd v6, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: mffprwz r6, f2 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: xscvspdpn f2, v4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: mffprwz r8, f4 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v5 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvdpsxws f1, f4 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f6 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, vs2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: slwi r9, r9, 16 +; CHECK-P8-NEXT: mffprwz r10, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: mffprwz r11, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 +; CHECK-P8-NEXT: slwi r10, r10, 16 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v6, v6, v8 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v5, v5, v9 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v7, v7, v8 -; CHECK-P8-NEXT: xxmrglw vs1, v6, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: vmrghh v0, v0, v8 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v7, v5 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxmrglw vs3, v0, v1 -; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f8, f8 +; CHECK-P8-NEXT: mffprwz r0, f0 +; CHECK-P8-NEXT: mffprwz r30, f5 +; CHECK-P8-NEXT: mffprwz r29, f3 +; CHECK-P8-NEXT: or r9, r9, r0 +; CHECK-P8-NEXT: mffprwz r28, f6 +; CHECK-P8-NEXT: or r4, r4, r30 +; CHECK-P8-NEXT: mffprwz r12, f4 +; CHECK-P8-NEXT: or r10, r10, r29 +; CHECK-P8-NEXT: rldimi r4, r9, 32, 0 +; CHECK-P8-NEXT: or r6, r6, r28 +; CHECK-P8-NEXT: mffprwz r27, f1 +; CHECK-P8-NEXT: mffprwz r0, f7 +; CHECK-P8-NEXT: rldimi r6, r10, 32, 0 +; CHECK-P8-NEXT: slwi r10, r11, 16 +; CHECK-P8-NEXT: mffprwz r9, f2 +; CHECK-P8-NEXT: or r10, r10, r27 +; CHECK-P8-NEXT: mffprwz r11, f8 +; CHECK-P8-NEXT: or r7, r7, r0 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: slwi r4, r8, 16 +; CHECK-P8-NEXT: slwi r8, r12, 16 +; CHECK-P8-NEXT: or r8, r8, r9 +; CHECK-P8-NEXT: or r4, r4, r11 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: rldimi r7, r10, 32, 0 +; CHECK-P8-NEXT: rldimi r4, r8, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r7 +; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs0, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xscvspdpn f4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 32(r4) +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r6, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs1, vs1, 1 +; CHECK-P9-NEXT: or r5, r5, r7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-P9-NEXT: or r4, r4, r7 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: or r6, r6, r7 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r8, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: slwi r8, r8, 16 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r7, r8, r7 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f4 +; CHECK-P9-NEXT: rldimi r7, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r8, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs3 +; CHECK-P9-NEXT: mffprwz r9, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs3, vs3, 3 +; CHECK-P9-NEXT: slwi r9, r9, 16 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r8, r9, r8 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: rldimi r8, r4, 32, 0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mffprwz r10, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 1 +; CHECK-P9-NEXT: mffprwz r9, f1 +; CHECK-P9-NEXT: mtvsrdd vs1, r8, r7 +; CHECK-P9-NEXT: slwi r10, r10, 16 +; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r9, r10, r9 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r9, r6, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: vmrghh v5, v0, v5 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs3, vs0 -; CHECK-P9-NEXT: xscvspdpn f4, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 3 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: or r4, r5, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxmrglw vs3, v4, v5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: xxmrgld vs2, vs3, vs2 -; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs0, vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r5, r9 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; @@ -1168,102 +1046,91 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) ; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv vs3, 0(r5) -; CHECK-BE-NEXT: xscvspdpn f6, vs2 +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: xscvspdpn f3, vs2 +; CHECK-BE-NEXT: xscvspdpn f4, vs1 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: lxv vs3, 48(r4) +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r6, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 1 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs1, vs1, 1 +; CHECK-BE-NEXT: or r5, r5, r7 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-BE-NEXT: or r4, r4, r7 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r7, f4 ; CHECK-BE-NEXT: xxsldwi vs4, vs2, vs2, 3 -; CHECK-BE-NEXT: xscvspdpn f9, vs1 -; CHECK-BE-NEXT: xxswapd vs5, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs7, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs8, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xxsldwi vs10, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs11, vs0 -; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: or r6, r6, r7 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f9, f9 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r8, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: slwi r8, r8, 16 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f10, vs10 -; CHECK-BE-NEXT: xscvspdpn f11, vs11 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: or r7, r8, r7 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: xscvdpsxws f8, f8 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f10, f10 -; CHECK-BE-NEXT: xscvdpsxws f11, f11 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtfprwz f6, r5 -; CHECK-BE-NEXT: mffprwz r5, f9 -; CHECK-BE-NEXT: mtfprwz f9, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtfprwz f4, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtfprwz f5, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxperm vs4, vs5, vs3 -; CHECK-BE-NEXT: xscvspdpn f5, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtfprwz f2, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtfprwz f7, r5 -; CHECK-BE-NEXT: mffprwz r5, f8 -; CHECK-BE-NEXT: xxperm vs2, vs6, vs3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtfprwz f8, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs4 -; CHECK-BE-NEXT: lxv vs4, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtfprwz f1, r5 -; CHECK-BE-NEXT: xxperm vs7, vs8, vs3 -; CHECK-BE-NEXT: mffprwz r5, f10 -; CHECK-BE-NEXT: xxperm vs1, vs9, vs3 -; CHECK-BE-NEXT: mtfprwz f10, r5 -; CHECK-BE-NEXT: mffprwz r5, f11 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f11, r5 -; CHECK-BE-NEXT: xxmrghw vs1, vs1, vs7 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs10, vs11, vs3 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-BE-NEXT: mtfprwz f0, r4 -; CHECK-BE-NEXT: xxperm vs0, vs5, vs3 -; CHECK-BE-NEXT: xxswapd vs5, vs4 +; CHECK-BE-NEXT: rldimi r7, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r8, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs3 +; CHECK-BE-NEXT: mffprwz r9, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs3, vs3, 3 +; CHECK-BE-NEXT: slwi r9, r9, 16 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: or r8, r9, r8 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r8, r4, 32, 0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r10, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r9, f1 +; CHECK-BE-NEXT: mtvsrdd vs1, r8, r7 +; CHECK-BE-NEXT: slwi r10, r10, 16 ; CHECK-BE-NEXT: stxv vs1, 0(r3) -; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r9, r10, r9 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs10 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: rldimi r9, r6, 32, 0 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: mtfprwz f2, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs3 -; CHECK-BE-NEXT: xscvspdpn f5, vs4 -; CHECK-BE-NEXT: xxsldwi vs4, vs4, vs4, 1 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: mtfprwz f4, r4 -; CHECK-BE-NEXT: xxperm vs4, vs5, vs3 -; CHECK-BE-NEXT: xxmrghw vs2, vs4, vs2 -; CHECK-BE-NEXT: xxmrghd vs0, vs2, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 +; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r5, r9 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -19,15 +19,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghb v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 -; CHECK-P8-NEXT: sth r3, -2(r1) +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P8-NEXT: sth r4, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; @@ -36,40 +31,21 @@ ; CHECK-P9-NEXT: mtfprd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P9-NEXT: sth r4, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f2, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: xxperm v2, vs2, vs1 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -82,89 +58,68 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r4, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: vmrghb v2, v2, v5 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r3, r4, 8, 16, 23 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, v2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-BE-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xscvspdpn f0, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-BE-NEXT: blr entry: %0 = fptoui <4 x float> %a to <4 x i8> @@ -175,54 +130,48 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f3, vs1 ; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f2, vs0 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f5, v3 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 1 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f6 -; CHECK-P8-NEXT: xscvdpsxws f2, f7 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r6, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: rlwimi r6, r3, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r7, f4 +; CHECK-P8-NEXT: rlwimi r6, r7, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r6, r5, 24, 0, 7 +; CHECK-P8-NEXT: mffprwz r3, f3 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: mtfprwz f0, r6 +; CHECK-P8-NEXT: rlwimi r8, r4, 8, 16, 23 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghb v2, v2, v4 -; CHECK-P8-NEXT: vmrghb v3, v3, v5 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v4, v0 -; CHECK-P8-NEXT: vmrghb v5, v5, v1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: rlwimi r8, r3, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r8, r4, 24, 0, 7 +; CHECK-P8-NEXT: mtfprwz f1, r8 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -231,51 +180,45 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtfprwz f1, r4 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 3 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: mtfprwz f0, r4 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; @@ -283,55 +226,46 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v2, vs3, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs2 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, ptr %0, align 32 @@ -343,305 +277,256 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: lxvd2x vs6, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v4, vs6 +; CHECK-P8-NEXT: xscvspdpn f2, vs0 ; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: lxvd2x vs4, r3, r4 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xscvspdpn f5, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvspdpn f6, vs3 +; CHECK-P8-NEXT: xscvspdpn f4, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xscvspdpn f5, vs3 ; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f8, v2 -; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v5 -; CHECK-P8-NEXT: xxswapd v7, vs4 +; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f0, vs6 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxswapd v2, vs3 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xscvspdpn f4, v3 +; CHECK-P8-NEXT: mffprwz r5, f5 +; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f5, vs7 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: vmrghb v2, v4, v0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f5, f9 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f7, vs4 -; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: xscvdpsxws f5, f6 -; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f3, f4 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r9, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: rlwimi r9, r3, 8, 16, 23 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f1, v7 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r10, f4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 +; CHECK-P8-NEXT: mffprwz r6, f0 +; CHECK-P8-NEXT: xscvspdpn f0, v2 +; CHECK-P8-NEXT: rlwimi r9, r10, 16, 8, 15 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: rlwimi r9, r7, 24, 0, 7 +; CHECK-P8-NEXT: mffprwz r8, f3 +; CHECK-P8-NEXT: xscvspdpn f3, v4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrghb v5, v0, v8 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f6 -; CHECK-P8-NEXT: vmrghb v3, v3, v6 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f7 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghb v0, v0, v6 -; CHECK-P8-NEXT: vmrghb v1, v1, v7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v6, v6, v8 -; CHECK-P8-NEXT: vmrghb v7, v7, v9 -; CHECK-P8-NEXT: vmrglh v2, v4, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mffprwz r11, f2 +; CHECK-P8-NEXT: mffprwz r0, f1 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: rlwimi r11, r4, 8, 16, 23 +; CHECK-P8-NEXT: rlwimi r0, r5, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: mffprwz r5, f7 +; CHECK-P8-NEXT: rlwimi r3, r6, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r12, f5 +; CHECK-P8-NEXT: rlwimi r0, r4, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: rlwimi r11, r12, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r11, r8, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r0, r4, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r3, r5, 24, 0, 7 +; CHECK-P8-NEXT: rldimi r11, r9, 32, 0 +; CHECK-P8-NEXT: rldimi r3, r0, 32, 0 +; CHECK-P8-NEXT: mtfprd f0, r11 +; CHECK-P8-NEXT: mtfprd f1, r3 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: rlwimi r3, r5, 8, 16, 23 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: rlwimi r3, r5, 24, 0, 7 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: rldimi r3, r4, 32, 0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: rlwimi r5, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: rlwimi r5, r4, 24, 0, 7 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: rlwimi r6, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs2 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r6, r4, 24, 0, 7 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs5, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: xxswapd vs5, vs3 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: xxperm v2, vs5, vs4 -; CHECK-BE-NEXT: xscvspdpn f5, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: xxswapd vs4, vs3 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxperm v3, vs5, vs4 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs3, vs4 -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xxmrghw vs2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: rlwimi r3, r5, 8, 16, 23 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v2, vs3, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: rlwimi r3, r5, 24, 0, 7 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: rldimi r3, r4, 32, 0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: rlwimi r5, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: rlwimi r5, r4, 24, 0, 7 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs4 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: rlwimi r6, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r6, r4, 24, 0, 7 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, ptr %0, align 64 @@ -659,14 +544,10 @@ ; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: vmrghb v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 8 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: sth r3, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr @@ -680,36 +561,18 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 8 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: sth r3, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f0, r3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f2, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: xxperm v2, vs2, vs1 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -722,89 +585,77 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v2 -; CHECK-P8-NEXT: xxswapd vs2, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: vmrghb v2, v2, v5 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 8 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: or r3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvspdpn f0, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-NEXT: slwi r4, r4, 8 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxswapd vs1, v2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs0 -; CHECK-BE-NEXT: xscvspdpn f1, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xscvspdpn f0, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: blr entry: %0 = fptosi <4 x float> %a to <4 x i8> @@ -815,54 +666,54 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v3 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v2 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f6 -; CHECK-P8-NEXT: xscvdpsxws f2, f7 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: slwi r3, r3, 8 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: slwi r4, r4, 24 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: or r3, r4, r3 +; CHECK-P8-NEXT: mffprwz r6, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghb v2, v2, v4 -; CHECK-P8-NEXT: vmrghb v3, v3, v5 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v4, v0 -; CHECK-P8-NEXT: vmrghb v5, v5, v1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: or r3, r3, r6 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: slwi r5, r5, 24 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r5, r5, r7 +; CHECK-P8-NEXT: slwi r7, r8, 8 +; CHECK-P8-NEXT: or r5, r5, r7 +; CHECK-P8-NEXT: or r4, r5, r4 +; CHECK-P8-NEXT: mtfprwz f1, r4 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -871,51 +722,51 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs0 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs0, vs0, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; @@ -923,55 +774,52 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v2, vs3, vs2 -; CHECK-BE-NEXT: xscvspdpn f3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs2 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, ptr %0, align 32 @@ -983,305 +831,296 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: lxvd2x vs5, 0, r3 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs4, r3, r4 +; CHECK-P8-NEXT: xscvspdpn f6, vs5 +; CHECK-P8-NEXT: xxswapd v5, vs5 +; CHECK-P8-NEXT: xscvspdpn f1, vs0 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xscvspdpn f3, vs2 ; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xscvspdpn f5, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvspdpn f6, vs3 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f8, v2 -; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, v5 -; CHECK-P8-NEXT: xxswapd v7, vs4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 +; CHECK-P8-NEXT: xxswapd v4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs4 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs8, v5, v5, 3 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f3, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: vmrghb v2, v4, v0 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mffprwz r6, f4 +; CHECK-P8-NEXT: mffprwz r3, f1 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: slwi r6, r6, 8 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: slwi r3, r3, 8 +; CHECK-P8-NEXT: xscvspdpn f2, v4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: slwi r4, r4, 8 +; CHECK-P8-NEXT: mffprwz r5, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f5, f9 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f7, vs4 -; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: slwi r5, r5, 8 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: slwi r7, r7, 24 +; CHECK-P8-NEXT: mffprwz r8, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: mffprwz r9, f2 ; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: xscvdpsxws f5, f6 -; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1 +; CHECK-P8-NEXT: slwi r8, r8, 24 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: slwi r9, r9, 24 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f1, v7 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrghb v5, v0, v8 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f0, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f6 -; CHECK-P8-NEXT: vmrghb v3, v3, v6 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f7 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghb v0, v0, v6 -; CHECK-P8-NEXT: vmrghb v1, v1, v7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v6, v6, v8 -; CHECK-P8-NEXT: vmrghb v7, v7, v9 -; CHECK-P8-NEXT: vmrglh v2, v4, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: xscvdpsxws f8, f8 +; CHECK-P8-NEXT: mffprwz r11, f0 +; CHECK-P8-NEXT: mffprwz r0, f3 +; CHECK-P8-NEXT: mffprwz r10, f4 +; CHECK-P8-NEXT: slwi r11, r11, 16 +; CHECK-P8-NEXT: mffprwz r30, f1 +; CHECK-P8-NEXT: slwi r0, r0, 16 +; CHECK-P8-NEXT: or r7, r7, r11 +; CHECK-P8-NEXT: mffprwz r29, f2 +; CHECK-P8-NEXT: or r8, r8, r0 +; CHECK-P8-NEXT: slwi r10, r10, 24 +; CHECK-P8-NEXT: or r3, r7, r3 +; CHECK-P8-NEXT: slwi r0, r30, 16 +; CHECK-P8-NEXT: mffprwz r12, f5 +; CHECK-P8-NEXT: or r4, r8, r4 +; CHECK-P8-NEXT: slwi r30, r29, 16 +; CHECK-P8-NEXT: mffprwz r11, f6 +; CHECK-P8-NEXT: or r9, r9, r0 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r0, f7 +; CHECK-P8-NEXT: or r10, r10, r30 +; CHECK-P8-NEXT: or r5, r9, r5 +; CHECK-P8-NEXT: or r3, r3, r12 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: or r6, r10, r6 +; CHECK-P8-NEXT: or r4, r4, r11 +; CHECK-P8-NEXT: or r5, r5, r0 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: or r6, r6, r7 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xscvspdpn f4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xscvspdpn f3, vs2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-P9-NEXT: slwi r4, r4, 24 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvspdpn f3, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-P9-NEXT: slwi r5, r5, 24 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-P9-NEXT: slwi r6, r6, 16 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvspdpn f1, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: or r5, r5, r6 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs2 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: slwi r6, r6, 8 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs5, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: xxswapd vs5, vs3 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: xxperm v2, vs5, vs4 -; CHECK-BE-NEXT: xscvspdpn f5, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs3 +; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xscvspdpn f3, vs2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 1 +; CHECK-BE-NEXT: slwi r4, r4, 24 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxperm v3, vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs2 -; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: or r4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs3, vs4 -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xxmrghw vs2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm v2, vs3, vs4 -; CHECK-BE-NEXT: xscvspdpn f3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 3 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 1 +; CHECK-BE-NEXT: slwi r5, r5, 24 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; CHECK-BE-NEXT: slwi r6, r6, 16 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm v3, vs1, vs4 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: or r5, r5, r6 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs2 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: slwi r6, r6, 8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, ptr %0, align 64 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -15,44 +15,29 @@ ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 0, 15 ; CHECK-BE-NEXT: blr entry: %0 = fptoui <2 x double> %a to <2 x i16> @@ -64,25 +49,23 @@ ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: vmrghh v3, v5, v3 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r6, f3 +; CHECK-P8-NEXT: mffprwz r5, f2 +; CHECK-P8-NEXT: rlwimi r4, r6, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 0, 15 +; CHECK-P8-NEXT: mtfprwz f1, r4 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -95,19 +78,17 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 +; CHECK-P9-NEXT: xscvdpsxws f2, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mtfprwz f1, r4 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-P9-NEXT: mtfprwz f0, r4 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; @@ -115,27 +96,22 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f3, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs2 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, ptr %0, align 32 @@ -147,137 +123,112 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f7, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f7, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v2, v0, v2 -; CHECK-P8-NEXT: vmrghh v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghh v4, v0, v4 -; CHECK-P8-NEXT: vmrghh v5, v1, v5 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r7, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r8, f1 +; CHECK-P8-NEXT: mffprwz r9, f2 +; CHECK-P8-NEXT: mffprwz r7, f3 +; CHECK-P8-NEXT: rlwimi r4, r8, 16, 0, 15 +; CHECK-P8-NEXT: rlwimi r5, r9, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: rlwimi r6, r7, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f1 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: rlwimi r5, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs2 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f5, f3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm vs3, vs5, vs4 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs4 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f1 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: rlwimi r5, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs4 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs4 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs2 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: rlwimi r6, r3, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r6, r4, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, ptr %0, align 64 @@ -288,265 +239,224 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 +; CHECK-P8-NEXT: li r5, 80 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: lxvd2x vs11, 0, r4 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 112 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: xscvdpsxws v3, f11 +; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r5 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 -; CHECK-P8-NEXT: li r6, 112 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: lxvd2x vs9, r4, r5 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f9, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f11, f5 +; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f12, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: mffprwz r7, f4 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f13, f10 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f6, f4 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 +; CHECK-P8-NEXT: xscvdpsxws v2, f9 +; CHECK-P8-NEXT: xxswapd vs9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxswapd vs6, vs10 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs4 -; CHECK-P8-NEXT: mtvsrd v2, r7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f6 -; CHECK-P8-NEXT: vmrghh v2, v8, v2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v3, v9, v3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghh v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f11, f11 ; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: vmrghh v0, v8, v0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v1, v9, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v6, v8, v6 -; CHECK-P8-NEXT: vmrghh v7, v9, v7 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v1, v0 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxmrglw vs3, v7, v6 -; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mffprwz r0, f0 +; CHECK-P8-NEXT: mffprwz r6, f6 +; CHECK-P8-NEXT: mffprwz r30, f1 +; CHECK-P8-NEXT: rlwimi r4, r0, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r9, f12 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: rlwimi r6, r30, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P8-NEXT: mffprwz r8, f10 +; CHECK-P8-NEXT: rlwimi r9, r0, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r10, f13 +; CHECK-P8-NEXT: mfvsrwz r11, v2 +; CHECK-P8-NEXT: mfvsrwz r12, v3 +; CHECK-P8-NEXT: mffprwz r29, f2 +; CHECK-P8-NEXT: mffprwz r28, f3 +; CHECK-P8-NEXT: mffprwz r30, f7 +; CHECK-P8-NEXT: rlwimi r7, r29, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r0, f9 +; CHECK-P8-NEXT: rlwimi r8, r28, 16, 0, 15 +; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: rlwimi r10, r30, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r8, r7, 32, 0 +; CHECK-P8-NEXT: rlwimi r11, r0, 16, 0, 15 +; CHECK-P8-NEXT: rldimi r10, r9, 32, 0 +; CHECK-P8-NEXT: mtfprd f0, r6 +; CHECK-P8-NEXT: rlwimi r12, r4, 16, 0, 15 +; CHECK-P8-NEXT: mtfprd f1, r8 +; CHECK-P8-NEXT: rldimi r12, r11, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r10 +; CHECK-P8-NEXT: mtfprd f3, r12 +; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xscvdpsxws f4, f1 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f5, f0 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: lxv vs4, 48(r4) -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f7, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) +; CHECK-P9-NEXT: lxv vs7, 16(r4) +; CHECK-P9-NEXT: lxv vs6, 0(r4) +; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: lxv vs4, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 +; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: lxv vs2, 64(r4) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: mffprwz r4, f8 +; CHECK-P9-NEXT: mffprwz r5, f7 +; CHECK-P9-NEXT: xscvdpsxws f7, f6 +; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: mffprwz r6, f6 +; CHECK-P9-NEXT: xscvdpsxws f6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: mffprwz r7, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: rlwimi r7, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghh v2, v2, v5 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: vmrghh v3, v3, v5 -; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mffprwz r4, f5 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: xxmrglw vs6, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: mffprwz r8, f4 +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: rlwimi r8, r4, 16, 0, 15 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: rldimi r8, r7, 32, 0 ; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f7 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 +; CHECK-P9-NEXT: xscvdpsxws f4, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxmrglw vs4, v2, v4 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs4, vs4, vs6 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f1 +; CHECK-P9-NEXT: mffprwz r9, f3 +; CHECK-P9-NEXT: mtvsrdd vs3, r8, r6 +; CHECK-P9-NEXT: rlwimi r9, r4, 16, 0, 15 +; CHECK-P9-NEXT: stxv vs3, 0(r3) +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: stxv vs4, 0(r3) -; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rldimi r5, r9, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 0, 15 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs0, vs0, vs2 +; CHECK-P9-NEXT: mffprwz r7, f0 +; CHECK-P9-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-P9-NEXT: rldimi r7, r6, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r7, r5 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 48(r4) -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: lxv vs1, 80(r4) -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: xscvdpsxws f9, f7 +; CHECK-BE-NEXT: lxv vs7, 32(r4) +; CHECK-BE-NEXT: lxv vs6, 48(r4) +; CHECK-BE-NEXT: lxv vs5, 0(r4) +; CHECK-BE-NEXT: lxv vs4, 16(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: lxv vs4, 0(r4) -; CHECK-BE-NEXT: lxv vs5, 16(r4) -; CHECK-BE-NEXT: lxv vs6, 32(r4) -; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv vs8, 0(r4) +; CHECK-BE-NEXT: lxv vs3, 96(r4) +; CHECK-BE-NEXT: lxv vs0, 80(r4) +; CHECK-BE-NEXT: lxv vs1, 64(r4) +; CHECK-BE-NEXT: lxv vs2, 112(r4) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f7 -; CHECK-BE-NEXT: mtfprwz f7, r4 -; CHECK-BE-NEXT: xxperm vs7, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f6 +; CHECK-BE-NEXT: mffprwz r4, f8 +; CHECK-BE-NEXT: mffprwz r5, f7 +; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f6 -; CHECK-BE-NEXT: mtfprwz f6, r4 -; CHECK-BE-NEXT: xxperm vs6, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f5 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mffprwz r6, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xxmrghw vs6, vs6, vs7 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs5, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f4 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mffprwz r7, f5 +; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 +; CHECK-BE-NEXT: rlwimi r7, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: mtfprwz f4, r4 -; CHECK-BE-NEXT: xxperm vs4, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f3 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: mffprwz r8, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: rlwimi r8, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xxmrghw vs4, vs4, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 +; CHECK-BE-NEXT: rldimi r8, r7, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxperm vs3, vs9, vs8 -; CHECK-BE-NEXT: mtfprwz f2, r4 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs8 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f1 +; CHECK-BE-NEXT: mffprwz r9, f3 +; CHECK-BE-NEXT: mtvsrdd vs3, r8, r6 +; CHECK-BE-NEXT: rlwimi r9, r4, 16, 0, 15 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtfprwz f1, r4 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs8 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 +; CHECK-BE-NEXT: rldimi r5, r9, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 0, 15 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtfprwz f0, r4 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs8 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs2 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r7, f0 +; CHECK-BE-NEXT: rlwimi r7, r4, 16, 0, 15 +; CHECK-BE-NEXT: rldimi r7, r6, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r7, r5 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -563,43 +473,31 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: slwi r3, r3, 16 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghh v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: xscvdpsxws f0, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: or r3, r4, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: xxswapd vs0, v2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: or r3, r4, r3 ; CHECK-BE-NEXT: blr entry: %0 = fptosi <2 x double> %a to <2 x i16> @@ -611,25 +509,25 @@ ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r6, f1 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: vmrghh v3, v5, v3 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: or r3, r4, r3 +; CHECK-P8-NEXT: slwi r4, r5, 16 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: mtfprwz f1, r4 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -638,23 +536,23 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs1, 0(r3) ; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xscvdpsxws f2, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxswapd vs2, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtfprwz f1, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs1 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; @@ -662,27 +560,24 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs2 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs2 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, ptr %0, align 32 @@ -694,137 +589,124 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f7, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f7, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v2, v0, v2 -; CHECK-P8-NEXT: vmrghh v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghh v4, v0, v4 -; CHECK-P8-NEXT: vmrghh v5, v1, v5 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: mffprwz r6, f0 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: slwi r6, r6, 16 +; CHECK-P8-NEXT: mffprwz r9, f3 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r3, r6, r3 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: or r4, r7, r4 +; CHECK-P8-NEXT: slwi r6, r8, 16 +; CHECK-P8-NEXT: mffprwz r10, f7 +; CHECK-P8-NEXT: slwi r7, r9, 16 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: or r5, r6, r5 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: or r6, r7, r10 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs0, vs1 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) +; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: or r4, r5, r4 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs2 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r5, r3 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f5, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: xxperm vs3, vs5, vs4 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs4 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: or r3, r4, r3 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs4 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtfprwz f0, r3 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs4 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs2 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r5, r3 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, ptr %0, align 64 @@ -835,265 +717,250 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 +; CHECK-P8-NEXT: li r5, 80 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r7, 80 +; CHECK-P8-NEXT: lxvd2x vs11, 0, r4 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: li r5, 112 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: li r6, 32 +; CHECK-P8-NEXT: xscvdpsxws v3, f11 +; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 -; CHECK-P8-NEXT: li r6, 112 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r5 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 ; CHECK-P8-NEXT: xscvdpsxws f6, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: lxvd2x vs9, r4, r5 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f9, f3 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f11, f5 -; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f12, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: mffprwz r7, f4 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f13, f10 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f6, f4 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f11 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xxswapd vs5, vs5 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 +; CHECK-P8-NEXT: xscvdpsxws v2, f9 +; CHECK-P8-NEXT: xxswapd vs9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xxswapd vs6, vs10 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs4 -; CHECK-P8-NEXT: mtvsrd v2, r7 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f6 -; CHECK-P8-NEXT: vmrghh v2, v8, v2 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v3, v9, v3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghh v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f11, f11 +; CHECK-P8-NEXT: mffprwz r12, f0 +; CHECK-P8-NEXT: mffprwz r0, f1 ; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: vmrghh v0, v8, v0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: vmrghh v1, v9, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v6, v8, v6 -; CHECK-P8-NEXT: vmrghh v7, v9, v7 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v1, v0 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxmrglw vs3, v7, v6 -; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: slwi r12, r12, 16 +; CHECK-P8-NEXT: mffprwz r6, f6 +; CHECK-P8-NEXT: slwi r0, r0, 16 +; CHECK-P8-NEXT: mffprwz r30, f2 +; CHECK-P8-NEXT: or r4, r12, r4 +; CHECK-P8-NEXT: mffprwz r29, f3 +; CHECK-P8-NEXT: or r6, r0, r6 +; CHECK-P8-NEXT: mffprwz r7, f8 +; CHECK-P8-NEXT: slwi r30, r30, 16 +; CHECK-P8-NEXT: rldimi r6, r4, 32, 0 +; CHECK-P8-NEXT: mffprwz r8, f10 +; CHECK-P8-NEXT: slwi r29, r29, 16 +; CHECK-P8-NEXT: mffprwz r28, f5 +; CHECK-P8-NEXT: or r7, r30, r7 +; CHECK-P8-NEXT: mffprwz r27, f7 +; CHECK-P8-NEXT: or r8, r29, r8 +; CHECK-P8-NEXT: mffprwz r12, f9 +; CHECK-P8-NEXT: rldimi r8, r7, 32, 0 +; CHECK-P8-NEXT: slwi r0, r28, 16 +; CHECK-P8-NEXT: mffprwz r4, f11 +; CHECK-P8-NEXT: slwi r30, r27, 16 +; CHECK-P8-NEXT: mffprwz r9, f12 +; CHECK-P8-NEXT: mffprwz r10, f13 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: mfvsrwz r11, v2 +; CHECK-P8-NEXT: or r9, r0, r9 +; CHECK-P8-NEXT: mfvsrwz r7, v3 +; CHECK-P8-NEXT: or r10, r30, r10 +; CHECK-P8-NEXT: mtfprd f0, r6 +; CHECK-P8-NEXT: slwi r6, r12, 16 +; CHECK-P8-NEXT: rldimi r10, r9, 32, 0 +; CHECK-P8-NEXT: or r6, r6, r11 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: mtfprd f1, r8 +; CHECK-P8-NEXT: rldimi r4, r6, 32, 0 +; CHECK-P8-NEXT: mtfprd f2, r10 +; CHECK-P8-NEXT: mtfprd f3, r4 +; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xscvdpsxws f4, f1 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f5, f0 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: lxv vs4, 48(r4) -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f7, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: lxv vs7, 16(r4) +; CHECK-P9-NEXT: lxv vs6, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 96(r4) +; CHECK-P9-NEXT: lxv vs1, 112(r4) +; CHECK-P9-NEXT: xxswapd vs8, vs7 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs2, 64(r4) +; CHECK-P9-NEXT: lxv vs3, 80(r4) +; CHECK-P9-NEXT: lxv vs4, 32(r4) +; CHECK-P9-NEXT: lxv vs5, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f8 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs6 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: slwi r4, r4, 16 +; CHECK-P9-NEXT: mffprwz r5, f8 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f6 +; CHECK-P9-NEXT: xxswapd vs6, vs5 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: mffprwz r6, f7 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: rldimi r5, r4, 32, 0 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: mffprwz r7, f6 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: or r6, r6, r7 +; CHECK-P9-NEXT: mffprwz r7, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: slwi r7, r7, 16 +; CHECK-P9-NEXT: mffprwz r8, f5 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghh v2, v2, v5 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: vmrghh v3, v3, v5 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r4, f5 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: xxmrglw vs6, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f7 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: or r7, r7, r8 +; CHECK-P9-NEXT: mffprwz r8, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxmrglw vs4, v2, v4 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs4, vs4, vs6 +; CHECK-P9-NEXT: rldimi r7, r6, 32, 0 +; CHECK-P9-NEXT: slwi r8, r8, 16 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r9, f4 +; CHECK-P9-NEXT: or r8, r8, r9 ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: stxv vs4, 0(r3) -; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mtvsrdd vs3, r7, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: xxmrglw vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r4 +; CHECK-P9-NEXT: slwi r5, r5, 16 +; CHECK-P9-NEXT: stxv vs3, 0(r3) +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: or r4, r5, r4 +; CHECK-P9-NEXT: rldimi r4, r8, 32, 0 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld vs0, vs0, vs2 +; CHECK-P9-NEXT: slwi r6, r6, 16 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: or r5, r6, r5 +; CHECK-P9-NEXT: mffprwz r7, f0 +; CHECK-P9-NEXT: slwi r7, r7, 16 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: or r6, r7, r6 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd vs0, r6, r4 ; CHECK-P9-NEXT: stxv vs0, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 48(r4) -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: lxv vs1, 80(r4) -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: xscvdpsxws f9, f7 -; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: lxv vs4, 0(r4) -; CHECK-BE-NEXT: lxv vs5, 16(r4) -; CHECK-BE-NEXT: lxv vs6, 32(r4) -; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv vs8, 0(r4) +; CHECK-BE-NEXT: lxv vs7, 32(r4) +; CHECK-BE-NEXT: lxv vs6, 48(r4) +; CHECK-BE-NEXT: lxv vs0, 80(r4) +; CHECK-BE-NEXT: lxv vs1, 64(r4) +; CHECK-BE-NEXT: xxswapd vs8, vs7 ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 +; CHECK-BE-NEXT: lxv vs2, 112(r4) +; CHECK-BE-NEXT: lxv vs3, 96(r4) +; CHECK-BE-NEXT: lxv vs4, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f8 ; CHECK-BE-NEXT: mffprwz r4, f7 -; CHECK-BE-NEXT: mtfprwz f7, r4 -; CHECK-BE-NEXT: xxperm vs7, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f6 -; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: xxswapd vs7, vs6 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f6 -; CHECK-BE-NEXT: mtfprwz f6, r4 -; CHECK-BE-NEXT: xxperm vs6, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f5 -; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: slwi r4, r4, 16 +; CHECK-BE-NEXT: mffprwz r5, f8 +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs5 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xxmrghw vs6, vs6, vs7 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: xxperm vs5, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs4 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: mffprwz r6, f7 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: mtfprwz f4, r4 -; CHECK-BE-NEXT: xxperm vs4, vs9, vs8 -; CHECK-BE-NEXT: xscvdpsxws f9, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: rldimi r5, r4, 32, 0 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: mffprwz r7, f6 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: or r6, r6, r7 +; CHECK-BE-NEXT: mffprwz r7, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xxmrghw vs4, vs4, vs5 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: mffprwz r8, f5 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: or r7, r7, r8 +; CHECK-BE-NEXT: mffprwz r8, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6 -; CHECK-BE-NEXT: mffprwz r4, f9 -; CHECK-BE-NEXT: mtfprwz f9, r4 -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: rldimi r7, r6, 32, 0 +; CHECK-BE-NEXT: slwi r8, r8, 16 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r9, f4 +; CHECK-BE-NEXT: or r8, r8, r9 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: mtfprwz f5, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxperm vs3, vs9, vs8 -; CHECK-BE-NEXT: mtfprwz f2, r4 -; CHECK-BE-NEXT: xxperm vs2, vs5, vs8 -; CHECK-BE-NEXT: xxmrghw vs2, vs2, vs3 -; CHECK-BE-NEXT: xscvdpsxws f3, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: mtvsrdd vs3, r7, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtfprwz f1, r4 -; CHECK-BE-NEXT: xxperm vs1, vs3, vs8 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 +; CHECK-BE-NEXT: slwi r5, r5, 16 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: or r4, r5, r4 +; CHECK-BE-NEXT: rldimi r4, r8, 32, 0 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtfprwz f3, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtfprwz f0, r4 -; CHECK-BE-NEXT: xxperm vs0, vs3, vs8 -; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs1 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs2 +; CHECK-BE-NEXT: slwi r6, r6, 16 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: or r5, r6, r5 +; CHECK-BE-NEXT: mffprwz r7, f0 +; CHECK-BE-NEXT: slwi r7, r7, 16 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: or r6, r7, r6 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd vs0, r6, r4 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -16,14 +16,9 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghb v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 -; CHECK-P8-NEXT: sth r3, -2(r1) +; CHECK-P8-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P8-NEXT: sth r4, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; @@ -32,33 +27,17 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 0, 23 +; CHECK-P9-NEXT: sth r4, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -70,28 +49,22 @@ define i32 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs0 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: vmrghb v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt: @@ -101,50 +74,35 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f3, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v2, vs3, vs2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v3, vs1, vs2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: rlwimi r3, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r3, r4, 24, 0, 7 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, ptr %0, align 32 @@ -156,48 +114,42 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f4, f0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f5, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f5, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f7, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 ; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r7, f0 +; CHECK-P8-NEXT: mffprwz r5, f6 +; CHECK-P8-NEXT: mffprwz r8, f2 +; CHECK-P8-NEXT: rlwimi r3, r7, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r7, f1 +; CHECK-P8-NEXT: rlwimi r5, r8, 8, 16, 23 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v2, v0, v2 -; CHECK-P8-NEXT: vmrghb v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v0, v4 -; CHECK-P8-NEXT: vmrghb v5, v1, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: rlwimi r5, r6, 16, 8, 15 +; CHECK-P8-NEXT: rlwimi r3, r7, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r5, r4, 24, 0, 7 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: mtfprwz f1, r5 +; CHECK-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr @@ -206,91 +158,76 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvdpsxws f2, f1 +; CHECK-P9-NEXT: xscvdpsxws f3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: mtfprwz f2, r4 +; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-P9-NEXT: mtfprwz f0, r4 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs2 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f5, f3 +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxperm v2, vs5, vs4 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v3, vs2, vs4 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, ptr %0, align 64 @@ -302,261 +239,212 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 ; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 48 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: li r5, 96 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: lxvd2x vs5, r3, r5 +; CHECK-P8-NEXT: li r5, 64 ; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: li r4, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: lxvd2x vs5, r3, r4 -; CHECK-P8-NEXT: li r4, 80 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs9, r3, r5 ; CHECK-P8-NEXT: lxvd2x vs7, r3, r4 -; CHECK-P8-NEXT: li r4, 96 +; CHECK-P8-NEXT: li r4, 80 ; CHECK-P8-NEXT: xscvdpsxws f8, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: lxvd2x vs11, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f10, f3 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, v3 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mffprwz r11, f0 +; CHECK-P8-NEXT: mffprwz r5, f8 +; CHECK-P8-NEXT: mffprwz r7, f12 +; CHECK-P8-NEXT: rlwimi r3, r11, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r12, f2 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: rlwimi r5, r12, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r6, f10 +; CHECK-P8-NEXT: rlwimi r7, r0, 8, 16, 23 +; CHECK-P8-NEXT: mffprwz r8, f13 +; CHECK-P8-NEXT: rlwimi r3, r4, 16, 8, 15 +; CHECK-P8-NEXT: mfvsrwz r9, v2 +; CHECK-P8-NEXT: rlwimi r5, r6, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r11, f9 +; CHECK-P8-NEXT: rlwimi r7, r8, 16, 8, 15 +; CHECK-P8-NEXT: mfvsrwz r10, v3 +; CHECK-P8-NEXT: mffprwz r12, f1 +; CHECK-P8-NEXT: rlwimi r9, r11, 8, 16, 23 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v8, v4 -; CHECK-P8-NEXT: vmrghb v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghb v0, v8, v0 -; CHECK-P8-NEXT: vmrghb v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrghb v6, v8, v6 -; CHECK-P8-NEXT: vmrghb v2, v9, v2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v3, v8, v3 -; CHECK-P8-NEXT: vmrghb v7, v9, v7 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v2, v2, v6 -; CHECK-P8-NEXT: vmrglh v3, v7, v3 -; CHECK-P8-NEXT: xxmrglw vs0, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: rlwimi r9, r10, 16, 8, 15 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: rlwimi r3, r12, 24, 0, 7 +; CHECK-P8-NEXT: mffprwz r8, f11 +; CHECK-P8-NEXT: rlwimi r5, r4, 24, 0, 7 +; CHECK-P8-NEXT: rlwimi r7, r6, 24, 0, 7 +; CHECK-P8-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P8-NEXT: rlwimi r9, r8, 24, 0, 7 +; CHECK-P8-NEXT: mtfprd f0, r5 +; CHECK-P8-NEXT: rldimi r9, r7, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r9 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: lxv vs7, 32(r3) +; CHECK-P9-NEXT: lxv vs6, 48(r3) +; CHECK-P9-NEXT: lxv vs5, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 80(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: lxv vs2, 80(r3) -; CHECK-P9-NEXT: lxv vs3, 64(r3) -; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: lxv vs1, 64(r3) +; CHECK-P9-NEXT: lxv vs2, 112(r3) +; CHECK-P9-NEXT: lxv vs3, 96(r3) +; CHECK-P9-NEXT: lxv vs4, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: xscvdpsxws f7, f6 -; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-P9-NEXT: mffprwz r3, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mffprwz r5, f6 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: xxswapd vs5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: xscvdpsxws f5, f3 +; CHECK-P9-NEXT: rlwimi r3, r5, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: rlwimi r3, r5, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs4, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: rldimi r3, r4, 32, 0 +; CHECK-P9-NEXT: mffprwz r4, f4 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: rlwimi r5, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: rlwimi r5, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: rlwimi r5, r4, 24, 0, 7 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: xscvdpsxws f1, f0 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs4 +; CHECK-P9-NEXT: rlwimi r6, r4, 8, 16, 23 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: rlwimi r6, r4, 16, 8, 15 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: rlwimi r6, r4, 24, 0, 7 +; CHECK-P9-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r3 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xscvdpsxws f9, f7 +; CHECK-BE-NEXT: lxv vs7, 80(r3) +; CHECK-BE-NEXT: lxv vs6, 64(r3) +; CHECK-BE-NEXT: lxv vs5, 112(r3) +; CHECK-BE-NEXT: lxv vs0, 32(r3) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv vs8, 0(r3) +; CHECK-BE-NEXT: lxv vs1, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: lxv vs3, 16(r3) +; CHECK-BE-NEXT: lxv vs4, 96(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r3, f9 -; CHECK-BE-NEXT: mtfprwz f9, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: xscvdpsxws f7, f6 -; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: mffprwz r3, f8 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs6 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: xxperm v2, vs9, vs8 +; CHECK-BE-NEXT: rlwimi r4, r3, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtfprwz f7, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 16, 8, 15 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: rlwimi r4, r3, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xxperm v3, vs7, vs8 -; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f6, r3 +; CHECK-BE-NEXT: mffprwz r5, f6 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: xscvdpsxws f5, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxswapd vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xxperm v3, vs6, vs8 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs5, vs8 -; CHECK-BE-NEXT: xscvdpsxws f5, f3 +; CHECK-BE-NEXT: rlwimi r3, r5, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: rlwimi r3, r5, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: rlwimi r3, r5, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs4, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: rldimi r3, r4, 32, 0 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxperm v2, vs5, vs8 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: rlwimi r5, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: rlwimi r5, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: rlwimi r5, r4, 24, 0, 7 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs8 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: xscvdpsxws f1, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v3, vs2, vs8 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs8 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs4 +; CHECK-BE-NEXT: rlwimi r6, r4, 8, 16, 23 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: rlwimi r6, r4, 16, 8, 15 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: rlwimi r6, r4, 24, 0, 7 +; CHECK-BE-NEXT: rldimi r6, r5, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, ptr %0, align 128 @@ -571,49 +459,30 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: slwi r3, r3, 8 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vmrghb v2, v2, v3 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprd r3, f0 -; CHECK-P8-NEXT: clrldi r3, r3, 48 +; CHECK-P8-NEXT: or r3, r3, r4 ; CHECK-P8-NEXT: sth r3, -2(r1) ; CHECK-P8-NEXT: lhz r3, -2(r1) ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 -; CHECK-P9-NEXT: stxsihx v2, 0, r3 +; CHECK-P9-NEXT: xscvdpsxws f0, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r4, r3 +; CHECK-P9-NEXT: sth r3, -2(r1) ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvdpsxws f1, v2 -; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: xxperm v2, vs1, vs0 -; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 -; CHECK-BE-NEXT: stxsihx v2, 0, r3 +; CHECK-BE-NEXT: li r3, -1 +; CHECK-BE-NEXT: sth r3, -2(r1) ; CHECK-BE-NEXT: lhz r3, -2(r1) ; CHECK-BE-NEXT: blr entry: @@ -626,80 +495,68 @@ ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f2, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f3, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-NEXT: xxswapd vs3, vs1 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xxswapd vs2, vs0 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r5, f3 +; CHECK-P8-NEXT: slwi r4, r4, 16 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: vmrghb v3, v5, v3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: slwi r3, r3, 24 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: slwi r4, r5, 8 +; CHECK-P8-NEXT: mffprwz r5, f1 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: or r3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 16(r3) +; CHECK-P9-NEXT: lxv vs1, 16(r3) +; CHECK-P9-NEXT: lxv vs0, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f3, f1 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v2, vs3, vs2 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: xxperm v3, vs1, vs2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: blr entry: %a = load <4 x double>, ptr %0, align 32 @@ -712,16 +569,16 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: lxvd2x vs3, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: xscvdpsxws f5, f1 ; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xscvdpsxws f6, f2 ; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xscvdpsxws f7, f3 @@ -731,121 +588,118 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: mffprwz r3, f6 -; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v2, v0, v2 -; CHECK-P8-NEXT: vmrghb v3, v1, v3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: vmrghb v4, v0, v4 -; CHECK-P8-NEXT: vmrghb v5, v1, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: mffprwz r5, f0 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r6, f1 +; CHECK-P8-NEXT: slwi r4, r4, 16 +; CHECK-P8-NEXT: mffprwz r7, f2 +; CHECK-P8-NEXT: slwi r5, r5, 24 +; CHECK-P8-NEXT: mffprwz r8, f3 +; CHECK-P8-NEXT: or r3, r5, r3 +; CHECK-P8-NEXT: slwi r6, r6, 8 +; CHECK-P8-NEXT: slwi r7, r7, 24 +; CHECK-P8-NEXT: mffprwz r5, f5 +; CHECK-P8-NEXT: or r3, r3, r6 +; CHECK-P8-NEXT: or r4, r7, r4 +; CHECK-P8-NEXT: mffprwz r6, f7 +; CHECK-P8-NEXT: slwi r7, r8, 8 +; CHECK-P8-NEXT: or r4, r4, r7 +; CHECK-P8-NEXT: or r3, r3, r5 +; CHECK-P8-NEXT: or r4, r4, r6 +; CHECK-P8-NEXT: mtfprwz f0, r3 +; CHECK-P8-NEXT: mtfprwz f1, r4 +; CHECK-P8-NEXT: xxmrghw vs0, vs1, vs0 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs3, 16(r3) +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs1, 48(r3) +; CHECK-P9-NEXT: lxv vs0, 32(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvdpsxws f2, f1 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: xscvdpsxws f3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r4, r4, 8 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtfprwz f2, r3 +; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs2 ; CHECK-P9-NEXT: mfvsrld r3, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv vs4, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f5, f3 +; CHECK-BE-NEXT: lxv vs3, 32(r3) +; CHECK-BE-NEXT: lxv vs2, 48(r3) +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxperm v2, vs5, vs4 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs4 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v3, vs2, vs4 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: mffprd r3, f0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, ptr %0, align 64 @@ -857,261 +711,252 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r3 +; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: li r5, 96 ; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: li r4, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 -; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs7, r3, r5 +; CHECK-P8-NEXT: li r5, 64 ; CHECK-P8-NEXT: lxvd2x vs5, r3, r4 ; CHECK-P8-NEXT: li r4, 80 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: lxvd2x vs7, r3, r4 -; CHECK-P8-NEXT: li r4, 96 -; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 -; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: lxvd2x vs11, r3, r5 ; CHECK-P8-NEXT: xscvdpsxws f10, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: lxvd2x vs11, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs9, r3, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f8, f2 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xscvdpsxws f6, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xscvdpsxws f12, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 -; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 +; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mffprwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f8 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: mffprwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v0, r3 -; CHECK-P8-NEXT: mffprwz r3, f12 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, v3 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghb v4, v8, v4 -; CHECK-P8-NEXT: vmrghb v5, v9, v5 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f5 -; CHECK-P8-NEXT: mffprwz r4, f7 -; CHECK-P8-NEXT: vmrghb v0, v8, v0 -; CHECK-P8-NEXT: vmrghb v1, v9, v1 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: mffprwz r3, f9 -; CHECK-P8-NEXT: mffprwz r4, f11 -; CHECK-P8-NEXT: vmrghb v6, v8, v6 -; CHECK-P8-NEXT: vmrghb v2, v9, v2 -; CHECK-P8-NEXT: mtvsrd v8, r3 -; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghb v3, v8, v3 -; CHECK-P8-NEXT: vmrghb v7, v9, v7 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v2, v2, v6 -; CHECK-P8-NEXT: vmrglh v3, v7, v3 -; CHECK-P8-NEXT: xxmrglw vs0, v5, v4 -; CHECK-P8-NEXT: xxmrglw vs1, v3, v2 -; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P8-NEXT: mffprwz r3, f4 +; CHECK-P8-NEXT: mffprwz r9, f0 +; CHECK-P8-NEXT: mffprwz r5, f8 +; CHECK-P8-NEXT: slwi r3, r3, 16 +; CHECK-P8-NEXT: mffprwz r11, f2 +; CHECK-P8-NEXT: slwi r9, r9, 24 +; CHECK-P8-NEXT: mffprwz r7, f12 +; CHECK-P8-NEXT: slwi r5, r5, 16 +; CHECK-P8-NEXT: or r3, r9, r3 +; CHECK-P8-NEXT: mfvsrwz r8, v2 +; CHECK-P8-NEXT: slwi r11, r11, 24 +; CHECK-P8-NEXT: mffprwz r10, f1 +; CHECK-P8-NEXT: slwi r7, r7, 16 +; CHECK-P8-NEXT: or r5, r11, r5 +; CHECK-P8-NEXT: mffprwz r12, f3 +; CHECK-P8-NEXT: slwi r8, r8, 16 +; CHECK-P8-NEXT: mffprwz r0, f5 +; CHECK-P8-NEXT: slwi r10, r10, 8 +; CHECK-P8-NEXT: mffprwz r29, f9 +; CHECK-P8-NEXT: slwi r12, r12, 8 +; CHECK-P8-NEXT: or r3, r3, r10 +; CHECK-P8-NEXT: mffprwz r30, f7 +; CHECK-P8-NEXT: slwi r11, r0, 24 +; CHECK-P8-NEXT: or r5, r5, r12 +; CHECK-P8-NEXT: mffprwz r9, f11 +; CHECK-P8-NEXT: slwi r0, r29, 24 +; CHECK-P8-NEXT: or r7, r11, r7 +; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: or r8, r0, r8 +; CHECK-P8-NEXT: slwi r12, r30, 8 +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mffprwz r6, f10 +; CHECK-P8-NEXT: slwi r9, r9, 8 +; CHECK-P8-NEXT: or r7, r7, r12 +; CHECK-P8-NEXT: mffprwz r11, f13 +; CHECK-P8-NEXT: or r8, r8, r9 +; CHECK-P8-NEXT: or r3, r3, r4 +; CHECK-P8-NEXT: mfvsrwz r10, v3 +; CHECK-P8-NEXT: or r4, r5, r6 +; CHECK-P8-NEXT: or r5, r7, r11 +; CHECK-P8-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P8-NEXT: or r6, r8, r10 +; CHECK-P8-NEXT: mtfprd f0, r4 +; CHECK-P8-NEXT: rldimi r6, r5, 32, 0 +; CHECK-P8-NEXT: mtfprd f1, r6 +; CHECK-P8-NEXT: xxmrghd v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs7, 0(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) +; CHECK-P9-NEXT: lxv vs7, 48(r3) +; CHECK-P9-NEXT: lxv vs6, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 64(r3) +; CHECK-P9-NEXT: lxv vs1, 80(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 -; CHECK-P9-NEXT: lxv vs2, 80(r3) -; CHECK-P9-NEXT: lxv vs3, 64(r3) -; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) +; CHECK-P9-NEXT: lxv vs2, 96(r3) +; CHECK-P9-NEXT: lxv vs3, 112(r3) +; CHECK-P9-NEXT: lxv vs4, 0(r3) +; CHECK-P9-NEXT: lxv vs5, 16(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r4, f7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r4, r4, 16 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f6 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: slwi r4, r4, 8 +; CHECK-P9-NEXT: or r3, r3, r4 +; CHECK-P9-NEXT: mffprwz r4, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: or r3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: slwi r4, r4, 24 +; CHECK-P9-NEXT: mffprwz r5, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: xscvdpsxws f5, f3 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r4, r4, r5 +; CHECK-P9-NEXT: mffprwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: or r4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs4, v3, v2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: rldimi r4, r3, 32, 0 +; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: slwi r3, r3, 24 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: slwi r5, r5, 16 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 -; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: slwi r5, r5, 8 +; CHECK-P9-NEXT: or r3, r3, r5 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: or r3, r3, r5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: slwi r5, r5, 24 +; CHECK-P9-NEXT: mffprwz r6, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: slwi r6, r6, 16 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P9-NEXT: xxmrgld v2, vs0, vs4 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: slwi r6, r6, 8 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: or r5, r5, r6 +; CHECK-P9-NEXT: rldimi r5, r3, 32, 0 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xscvdpsxws f9, f7 +; CHECK-BE-NEXT: lxv vs7, 64(r3) +; CHECK-BE-NEXT: lxv vs6, 80(r3) +; CHECK-BE-NEXT: lxv vs0, 48(r3) +; CHECK-BE-NEXT: lxv vs1, 32(r3) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) -; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv vs8, 0(r3) +; CHECK-BE-NEXT: lxv vs2, 16(r3) +; CHECK-BE-NEXT: lxv vs3, 0(r3) +; CHECK-BE-NEXT: lxv vs4, 112(r3) +; CHECK-BE-NEXT: lxv vs5, 96(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r3, f9 -; CHECK-BE-NEXT: mtfprwz f9, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 +; CHECK-BE-NEXT: mffprwz r3, f8 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r4, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r4, r4, 16 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: xxperm v2, vs9, vs8 -; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtfprwz f7, r3 -; CHECK-BE-NEXT: mffprwz r3, f6 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: slwi r4, r4, 8 +; CHECK-BE-NEXT: or r3, r3, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: or r3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xxperm v3, vs7, vs8 -; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f6, r3 -; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: slwi r4, r4, 24 +; CHECK-BE-NEXT: mffprwz r5, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xxperm v3, vs6, vs8 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs5, vs8 -; CHECK-BE-NEXT: xscvdpsxws f5, f3 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r4, r4, r5 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: or r4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs4, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtfprwz f5, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: rldimi r4, r3, 32, 0 +; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: slwi r3, r3, 24 +; CHECK-BE-NEXT: mffprwz r5, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: slwi r5, r5, 16 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xxperm v2, vs5, vs8 -; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtfprwz f3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: slwi r5, r5, 8 +; CHECK-BE-NEXT: or r3, r3, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: or r3, r3, r5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xxperm v3, vs3, vs8 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtfprwz f2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: slwi r5, r5, 24 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: slwi r6, r6, 16 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: xxperm v3, vs2, vs8 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtfprwz f1, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xxperm v4, vs1, vs8 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xxmrghw vs0, v3, v2 -; CHECK-BE-NEXT: xxmrghd v2, vs0, vs4 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f1 +; CHECK-BE-NEXT: slwi r6, r6, 8 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: or r5, r5, r6 +; CHECK-BE-NEXT: rldimi r5, r3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, ptr %0, align 128 diff --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll --- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll +++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll @@ -288,91 +288,80 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) { ; CHECK-LE-LABEL: testInvalidExtend: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: li 3, 0 ; CHECK-LE-NEXT: li 4, 2 -; CHECK-LE-NEXT: li 5, 4 ; CHECK-LE-NEXT: li 6, 6 -; CHECK-LE-NEXT: vextubrx 3, 3, 2 +; CHECK-LE-NEXT: li 3, 0 +; CHECK-LE-NEXT: li 5, 4 ; CHECK-LE-NEXT: vextubrx 4, 4, 2 -; CHECK-LE-NEXT: vextubrx 5, 5, 2 ; CHECK-LE-NEXT: vextubrx 6, 6, 2 -; CHECK-LE-NEXT: li 7, 8 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 +; CHECK-LE-NEXT: vextubrx 5, 5, 2 ; CHECK-LE-NEXT: li 8, 10 -; CHECK-LE-NEXT: li 9, 12 ; CHECK-LE-NEXT: li 10, 14 -; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: li 7, 8 +; CHECK-LE-NEXT: li 9, 12 ; CHECK-LE-NEXT: extsb 4, 4 -; CHECK-LE-NEXT: extsb 5, 5 ; CHECK-LE-NEXT: extsb 6, 6 -; CHECK-LE-NEXT: vextubrx 7, 7, 2 ; CHECK-LE-NEXT: vextubrx 8, 8, 2 -; CHECK-LE-NEXT: extsb 7, 7 +; CHECK-LE-NEXT: vextubrx 10, 10, 2 +; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: extsb 5, 5 ; CHECK-LE-NEXT: extsb 8, 8 -; CHECK-LE-NEXT: mtvsrd 35, 4 +; CHECK-LE-NEXT: extsb 10, 10 +; CHECK-LE-NEXT: slwi 6, 6, 16 +; CHECK-LE-NEXT: slwi 4, 4, 16 +; CHECK-LE-NEXT: vextubrx 7, 7, 2 ; CHECK-LE-NEXT: vextubrx 9, 9, 2 -; CHECK-LE-NEXT: vextubrx 10, 10, 2 -; CHECK-LE-NEXT: mtvsrd 34, 3 -; CHECK-LE-NEXT: mtvsrd 36, 6 +; CHECK-LE-NEXT: extsb 7, 7 ; CHECK-LE-NEXT: extsb 9, 9 -; CHECK-LE-NEXT: extsb 10, 10 -; CHECK-LE-NEXT: vmrghh 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 5 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: mtvsrd 36, 10 -; CHECK-LE-NEXT: xxmrglw 0, 35, 34 -; CHECK-LE-NEXT: mtvsrd 34, 7 -; CHECK-LE-NEXT: mtvsrd 35, 8 -; CHECK-LE-NEXT: vmrghh 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 9 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: xxmrglw 1, 35, 34 -; CHECK-LE-NEXT: xxmrgld 34, 1, 0 +; CHECK-LE-NEXT: or 5, 6, 5 +; CHECK-LE-NEXT: or 3, 4, 3 +; CHECK-LE-NEXT: slwi 4, 10, 16 +; CHECK-LE-NEXT: rldimi 3, 5, 32, 0 +; CHECK-LE-NEXT: slwi 5, 8, 16 +; CHECK-LE-NEXT: or 4, 4, 9 +; CHECK-LE-NEXT: or 5, 5, 7 +; CHECK-LE-NEXT: rldimi 5, 4, 32, 0 +; CHECK-LE-NEXT: mtvsrdd 34, 5, 3 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: testInvalidExtend: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: li 9, 12 -; CHECK-BE-NEXT: li 10, 14 +; CHECK-BE-NEXT: li 3, 0 ; CHECK-BE-NEXT: li 7, 8 -; CHECK-BE-NEXT: li 8, 10 -; CHECK-BE-NEXT: vextublx 9, 9, 2 -; CHECK-BE-NEXT: vextublx 10, 10, 2 -; CHECK-BE-NEXT: vextublx 7, 7, 2 -; CHECK-BE-NEXT: vextublx 8, 8, 2 +; CHECK-BE-NEXT: li 4, 2 ; CHECK-BE-NEXT: li 5, 4 +; CHECK-BE-NEXT: vextublx 3, 3, 2 +; CHECK-BE-NEXT: vextublx 7, 7, 2 +; CHECK-BE-NEXT: li 8, 10 +; CHECK-BE-NEXT: li 9, 12 +; CHECK-BE-NEXT: vextublx 4, 4, 2 ; CHECK-BE-NEXT: li 6, 6 -; CHECK-BE-NEXT: li 3, 0 -; CHECK-BE-NEXT: li 4, 2 -; CHECK-BE-NEXT: extsb 9, 9 -; CHECK-BE-NEXT: extsb 10, 10 -; CHECK-BE-NEXT: extsb 7, 7 -; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: li 10, 14 +; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: vextublx 5, 5, 2 +; CHECK-BE-NEXT: vextublx 8, 8, 2 +; CHECK-BE-NEXT: vextublx 9, 9, 2 +; CHECK-BE-NEXT: extsb 7, 7 ; CHECK-BE-NEXT: vextublx 6, 6, 2 +; CHECK-BE-NEXT: extsb 4, 4 ; CHECK-BE-NEXT: extsb 5, 5 +; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: extsb 9, 9 ; CHECK-BE-NEXT: extsb 6, 6 -; CHECK-BE-NEXT: mtfprwz 1, 9 -; CHECK-BE-NEXT: addis 9, 2, .LCPI11_0@toc@ha -; CHECK-BE-NEXT: mtfprwz 0, 10 -; CHECK-BE-NEXT: mtfprwz 3, 7 -; CHECK-BE-NEXT: vextublx 3, 3, 2 -; CHECK-BE-NEXT: extsb 3, 3 -; CHECK-BE-NEXT: mtfprwz 4, 3 -; CHECK-BE-NEXT: addi 9, 9, .LCPI11_0@toc@l -; CHECK-BE-NEXT: vextublx 4, 4, 2 -; CHECK-BE-NEXT: extsb 4, 4 -; CHECK-BE-NEXT: lxv 2, 0(9) -; CHECK-BE-NEXT: xxperm 0, 1, 2 -; CHECK-BE-NEXT: mtfprwz 1, 8 -; CHECK-BE-NEXT: xxperm 1, 3, 2 -; CHECK-BE-NEXT: mtfprwz 3, 5 -; CHECK-BE-NEXT: xxmrghw 0, 1, 0 -; CHECK-BE-NEXT: mtfprwz 1, 6 -; CHECK-BE-NEXT: xxperm 1, 3, 2 -; CHECK-BE-NEXT: mtfprwz 3, 4 -; CHECK-BE-NEXT: xxperm 3, 4, 2 -; CHECK-BE-NEXT: xxmrghw 1, 3, 1 -; CHECK-BE-NEXT: xxmrghd 34, 1, 0 +; CHECK-BE-NEXT: slwi 7, 7, 16 +; CHECK-BE-NEXT: vextublx 10, 10, 2 +; CHECK-BE-NEXT: slwi 3, 3, 16 +; CHECK-BE-NEXT: extsb 10, 10 +; CHECK-BE-NEXT: or 7, 7, 8 +; CHECK-BE-NEXT: slwi 8, 9, 16 +; CHECK-BE-NEXT: or 3, 3, 4 +; CHECK-BE-NEXT: slwi 4, 5, 16 +; CHECK-BE-NEXT: or 8, 8, 10 +; CHECK-BE-NEXT: or 4, 4, 6 +; CHECK-BE-NEXT: rldimi 8, 7, 32, 0 +; CHECK-BE-NEXT: rldimi 4, 3, 32, 0 +; CHECK-BE-NEXT: mtvsrdd 34, 4, 8 ; CHECK-BE-NEXT: blr entry: