diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14552,6 +14552,11 @@ // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { + // Delay VSX load for LE combine until after LegalizeOps to prioritize other + // load combines. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; @@ -14586,13 +14591,6 @@ MVT VecTy = N->getValueType(0).getSimpleVT(); - // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && - VecTy.getScalarSizeInBits() <= 32) { - return SDValue(); - } - SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), @@ -14620,6 +14618,11 @@ // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { + // Delay VSX store for LE combine until after LegalizeOps to prioritize other + // store combines. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; @@ -14657,13 +14660,6 @@ SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); - // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is - // aligned and the type is a vector with elements up to 4 bytes - if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && - VecTy.getScalarSizeInBits() <= 32) { - return SDValue(); - } - // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); diff --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll --- a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll +++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll @@ -25,12 +25,12 @@ ; CHECK-LE-NEXT: addis r4, r2, .LCPI0_1@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-LE-NEXT: addi r4, r4, .LCPI0_1@toc@l -; CHECK-LE-NEXT: lvx v2, 0, r3 -; CHECK-LE-NEXT: lvx v3, 0, r4 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs1, 0, r4 ; CHECK-LE-NEXT: addi r4, r1, 48 ; CHECK-LE-NEXT: addi r3, r1, 32 -; CHECK-LE-NEXT: stvx v2, 0, r4 -; CHECK-LE-NEXT: stvx v3, 0, r3 +; CHECK-LE-NEXT: stxvd2x vs0, 0, r4 +; CHECK-LE-NEXT: stxvd2x vs1, 0, r3 ; CHECK-LE-NEXT: bl test ; CHECK-LE-NEXT: nop ; CHECK-LE-NEXT: lwa r3, 32(r1) @@ -99,9 +99,9 @@ ; CHECK-LE-NEXT: stdux r1, r1, r0 ; CHECK-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-LE-NEXT: lvx v2, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: addi r3, r1, 32 -; CHECK-LE-NEXT: stvx v2, 0, r3 +; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: bl test1 ; CHECK-LE-NEXT: nop ; CHECK-LE-NEXT: lwa r3, 32(r1) @@ -158,7 +158,69 @@ ; CHECK-OPT-NEXT: entry: ; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2 ; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], <16 x i16>* [[TMP0:%.*]], align 2 - +; CHECK-LE-LABEL: test_Array: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mflr r0 +; CHECK-LE-NEXT: std r0, 16(r1) +; CHECK-LE-NEXT: stdu r1, -176(r1) +; CHECK-LE-NEXT: addis r4, r2, Arr1@toc@ha +; CHECK-LE-NEXT: li r3, 0 +; CHECK-LE-NEXT: li r6, 65 +; CHECK-LE-NEXT: addi r5, r1, 46 +; CHECK-LE-NEXT: addi r4, r4, Arr1@toc@l +; CHECK-LE-NEXT: stw r3, 44(r1) +; CHECK-LE-NEXT: addi r4, r4, -1 +; CHECK-LE-NEXT: mtctr r6 +; CHECK-LE-NEXT: bdz .LBB2_2 +; CHECK-LE-NEXT: .p2align 5 +; CHECK-LE-NEXT: .LBB2_1: # %for.body +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: lbz r6, 1(r4) +; CHECK-LE-NEXT: addi r7, r5, 2 +; CHECK-LE-NEXT: addi r4, r4, 1 +; CHECK-LE-NEXT: addi r3, r3, 1 +; CHECK-LE-NEXT: sth r6, 2(r5) +; CHECK-LE-NEXT: mr r5, r7 +; CHECK-LE-NEXT: bdnz .LBB2_1 +; CHECK-LE-NEXT: .LBB2_2: # %for.cond.cleanup +; CHECK-LE-NEXT: addi r3, r1, 48 +; CHECK-LE-NEXT: bl test_arr +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: addi r1, r1, 176 +; CHECK-LE-NEXT: ld r0, 16(r1) +; CHECK-LE-NEXT: mtlr r0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_Array: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: stdu r1, -256(r1) +; CHECK-BE-NEXT: addis r5, r2, Arr1@toc@ha +; CHECK-BE-NEXT: li r3, 0 +; CHECK-BE-NEXT: addi r5, r5, Arr1@toc@l +; CHECK-BE-NEXT: addi r4, r1, 126 +; CHECK-BE-NEXT: li r6, 65 +; CHECK-BE-NEXT: stw r3, 124(r1) +; CHECK-BE-NEXT: addi r5, r5, -1 +; CHECK-BE-NEXT: mtctr r6 +; CHECK-BE-NEXT: bdz .LBB2_2 +; CHECK-BE-NEXT: .LBB2_1: # %for.body +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: lbz r6, 1(r5) +; CHECK-BE-NEXT: addi r5, r5, 1 +; CHECK-BE-NEXT: addi r3, r3, 1 +; CHECK-BE-NEXT: sth r6, 2(r4) +; CHECK-BE-NEXT: addi r4, r4, 2 +; CHECK-BE-NEXT: bdnz .LBB2_1 +; CHECK-BE-NEXT: .LBB2_2: # %for.cond.cleanup +; CHECK-BE-NEXT: addi r3, r1, 128 +; CHECK-BE-NEXT: bl test_arr +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: addi r1, r1, 256 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr entry: %Arr2 = alloca [64 x i16], align 2 %i = alloca i32, align 4 diff --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll --- a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -13,8 +13,9 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8: lfiwzx f0, 0, r3 ; CHECK-P8: ld r4, .LC0@toc@l(r4) -; CHECK-P8: xxspltw v2, vs0, 1 -; CHECK-P8: stvx v2, 0, r4 +; CHECK-P8: xxspltw vs0, vs0, 1 +; CHECK-P8: xxswapd vs0, vs0 +; CHECK-P8; stxvd2x vs0, 0, r4 ; CHECK-P8: lis r4, 1024 ; CHECK-P8: lfiwax f0, 0, r3 ; CHECK-P8: addis r3, r2, .LC1@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll --- a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll @@ -47,11 +47,13 @@ ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: mullw 4, 4, 5 ; CHECK-NEXT: vsplth 2, 2, 3 -; CHECK-NEXT: stvx 2, 0, 3 +; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: neg 4, 4 ; CHECK-NEXT: mtvsrd 35, 4 +; CHECK-NEXT: stxvd2x 0, 0, 3 ; CHECK-NEXT: vsplth 3, 3, 3 -; CHECK-NEXT: stvx 3, 0, 3 +; CHECK-NEXT: xxswapd 1, 35 +; CHECK-NEXT: stxvd2x 1, 0, 3 bb: br i1 undef, label %bb22, label %bb3 diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -879,7 +879,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> @@ -947,12 +948,13 @@ ; ; P8LE-LABEL: fromDiffMemConsDi: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI7_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI7_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI7_0@toc@l +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 @@ -1059,10 +1061,11 @@ ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha ; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l ; P8LE-NEXT: addi r3, r3, -12 +; P8LE-NEXT: lxvd2x vs1, 0, r4 ; P8LE-NEXT: lxvd2x vs0, 0, r3 -; P8LE-NEXT: addi r3, r5, .LCPI9_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 +; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr @@ -1404,7 +1407,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI16_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI16_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> @@ -1475,12 +1479,13 @@ ; ; P8LE-LABEL: fromDiffMemConsDConvftoi: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI18_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspsxws v2, v2 ; P8LE-NEXT: blr entry: @@ -1858,7 +1863,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI25_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI25_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> @@ -2398,7 +2404,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI37_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI37_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> @@ -2466,12 +2473,13 @@ ; ; P8LE-LABEL: fromDiffMemConsDui: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI39_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI39_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI39_0@toc@l +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 @@ -2578,10 +2586,11 @@ ; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha ; P8LE-NEXT: add r3, r3, r4 +; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l ; P8LE-NEXT: addi r3, r3, -12 +; P8LE-NEXT: lxvd2x vs1, 0, r4 ; P8LE-NEXT: lxvd2x vs0, 0, r3 -; P8LE-NEXT: addi r3, r5, .LCPI41_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 +; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr @@ -2923,7 +2932,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI48_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI48_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> @@ -2994,12 +3004,13 @@ ; ; P8LE-LABEL: fromDiffMemConsDConvftoui: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha -; P8LE-NEXT: addi r3, r4, .LCPI50_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: vperm v2, v3, v3, v2 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspuxws v2, v2 ; P8LE-NEXT: blr entry: @@ -3378,7 +3389,8 @@ ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI57_0@toc@ha ; P8LE-NEXT: addi r3, r3, .LCPI57_0@toc@l -; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: blr entry: ret <4 x i32> diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -83,7 +83,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -157,7 +158,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -231,7 +233,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -305,7 +308,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI7_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -379,7 +383,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI9_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI9_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -453,7 +458,8 @@ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis r3, r2, .LCPI11_0@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI11_0@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P7-NEXT: xxswapd v4, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr ; @@ -511,10 +517,11 @@ ; CHECK-P7-NEXT: xxlxor v4, v4, v4 ; CHECK-P7-NEXT: std r3, -16(r1) ; CHECK-P7-NEXT: addis r3, r2, .LCPI12_0@toc@ha -; CHECK-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P7-NEXT: addi r3, r3, .LCPI12_0@toc@l -; CHECK-P7-NEXT: lvx v3, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P7-NEXT: lxvd2x vs1, 0, r3 ; CHECK-P7-NEXT: xxswapd v2, vs0 +; CHECK-P7-NEXT: xxswapd v3, vs1 ; CHECK-P7-NEXT: vperm v2, v2, v4, v3 ; CHECK-P7-NEXT: blr ; @@ -691,8 +698,10 @@ ; CHECK-P7-NEXT: stw r3, -16(r1) ; CHECK-P7-NEXT: addi r3, r1, -16 ; CHECK-P7-NEXT: addi r4, r4, .LCPI14_0@toc@l -; CHECK-P7-NEXT: lvx v3, 0, r3 -; CHECK-P7-NEXT: lvx v2, 0, r4 +; CHECK-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P7-NEXT: xxswapd v3, vs1 +; CHECK-P7-NEXT: xxswapd v2, vs0 ; CHECK-P7-NEXT: vperm v2, v3, v3, v2 ; CHECK-P7-NEXT: blr ; @@ -722,7 +731,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vmrgow v2, v3, v2 ; CHECK-P8-NEXT: blr ; @@ -761,9 +771,11 @@ ; CHECK-P7-NEXT: addis r3, r2, .LCPI15_0@toc@ha ; CHECK-P7-NEXT: addis r4, r2, .LCPI15_1@toc@ha ; CHECK-P7-NEXT: addi r3, r3, .LCPI15_0@toc@l -; CHECK-P7-NEXT: lvx v3, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P7-NEXT: addi r3, r4, .LCPI15_1@toc@l -; CHECK-P7-NEXT: lvx v4, 0, r3 +; CHECK-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P7-NEXT: xxswapd v3, vs0 +; CHECK-P7-NEXT: xxswapd v4, vs1 ; CHECK-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-P7-NEXT: blr ; @@ -1096,8 +1108,8 @@ define <2 x i64> @testSplati64_1(<2 x i64>* nocapture readonly %ptr) #0 { ; CHECK-P8-LABEL: testSplati64_1: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: xxspltd v2, vs0, 1 +; CHECK-P8-NEXT: addi r3, r3, 8 +; CHECK-P8-NEXT: lxvdsx v2, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testSplati64_1: @@ -1128,8 +1140,8 @@ ; ; CHECK-P7-LABEL: testSplati64_1: ; CHECK-P7: # %bb.0: # %entry -; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P7-NEXT: xxspltd v2, vs0, 1 +; CHECK-P7-NEXT: addi r3, r3, 8 +; CHECK-P7-NEXT: lxvdsx v2, 0, r3 ; CHECK-P7-NEXT: blr ; ; P8-AIX-LABEL: testSplati64_1: @@ -1149,7 +1161,8 @@ ; CHECK-P8-NEXT: lbzx r3, 0, r3 ; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: vspltb v2, v2, 7 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: testByteSplat: @@ -1182,7 +1195,8 @@ ; CHECK-P7-NEXT: lvx v3, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v3, v2 ; CHECK-P7-NEXT: vspltb v2, v2, 15 -; CHECK-P7-NEXT: stvx v2, 0, r3 +; CHECK-P7-NEXT: xxswapd vs0, v2 +; CHECK-P7-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P7-NEXT: blr ; ; P8-AIX-LABEL: testByteSplat: diff --git a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll --- a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll +++ b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll @@ -12,9 +12,11 @@ ; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha ; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lvx 2, 0, 3 -; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI0_1@toc@l -; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI0_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4 +; CHECK-LE-P8-NEXT: xxswapd 34, 0 +; CHECK-LE-P8-NEXT: xxswapd 35, 1 ; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 ; CHECK-LE-P8-NEXT: blr ; @@ -52,9 +54,11 @@ ; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha ; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK-LE-P8-NEXT: lvx 2, 0, 3 -; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI1_1@toc@l -; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI1_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4 +; CHECK-LE-P8-NEXT: xxswapd 34, 0 +; CHECK-LE-P8-NEXT: xxswapd 35, 1 ; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 ; CHECK-LE-P8-NEXT: blr ; @@ -91,9 +95,11 @@ ; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha ; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha ; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lvx 2, 0, 3 -; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI2_1@toc@l -; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI2_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4 +; CHECK-LE-P8-NEXT: xxswapd 34, 0 +; CHECK-LE-P8-NEXT: xxswapd 35, 1 ; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 ; CHECK-LE-P8-NEXT: blr ; @@ -130,9 +136,11 @@ ; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha ; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha ; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l -; CHECK-LE-P8-NEXT: lvx 2, 0, 3 -; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI3_1@toc@l -; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: addi 4, 4, .LCPI3_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-LE-P8-NEXT: lxvd2x 1, 0, 4 +; CHECK-LE-P8-NEXT: xxswapd 34, 0 +; CHECK-LE-P8-NEXT: xxswapd 35, 1 ; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 ; CHECK-LE-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll --- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -573,8 +573,9 @@ ; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 ; CHECK-NEXT: li r4, 20 ; CHECK-NEXT: addi r3, r3, .LCPI16_0@toc@l -; CHECK-NEXT: lvx v3, 0, r3 +; CHECK-NEXT: lxvd2x vs0, 0, r3 ; CHECK-NEXT: li r3, 16 +; CHECK-NEXT: xxswapd vs35, vs0 ; CHECK-NEXT: vperm v3, v2, v2, v3 ; CHECK-NEXT: xxswapd vs0, vs35 ; CHECK-NEXT: stxvd2x vs0, 0, r5 diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll --- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll +++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll @@ -31,7 +31,8 @@ ; CHECK-P8-LABEL: testArray_01: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addi r3, r3, 32 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -61,7 +62,8 @@ ; CHECK-P8-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-P8-NEXT: ld r3, .LC0@toc@l(r3) ; CHECK-P8-NEXT: addi r3, r3, 32 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -148,11 +150,12 @@ ; CHECK-P8-NEXT: std r4, 40(r1) ; CHECK-P8-NEXT: std r5, 48(r1) ; CHECK-P8-NEXT: std r6, 56(r1) -; CHECK-P8-NEXT: lvx v2, r12, r11 +; CHECK-P8-NEXT: lxvd2x vs0, r12, r11 ; CHECK-P8-NEXT: std r7, 64(r1) ; CHECK-P8-NEXT: std r8, 72(r1) ; CHECK-P8-NEXT: std r9, 80(r1) ; CHECK-P8-NEXT: std r10, 88(r1) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -288,7 +291,8 @@ ; CHECK-P8-NEXT: addi r3, r1, -16 ; CHECK-P8-NEXT: std r8, -8(r1) ; CHECK-P8-NEXT: std r7, -16(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -314,7 +318,8 @@ ; CHECK-P8-NEXT: addi r3, r1, -16 ; CHECK-P8-NEXT: std r6, -8(r1) ; CHECK-P8-NEXT: std r5, -16(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -364,7 +369,8 @@ ; CHECK-P8-NEXT: addi r4, r1, 48 ; CHECK-P8-NEXT: std r6, 56(r1) ; CHECK-P8-NEXT: std r5, 48(r1) -; CHECK-P8-NEXT: lvx v31, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 @@ -438,11 +444,12 @@ ; CHECK-P8-NEXT: std r9, 80(r1) ; CHECK-P8-NEXT: std r10, 88(r1) ; CHECK-P8-NEXT: addi r7, r1, 32 -; CHECK-P8-NEXT: lvx v2, r7, r11 +; CHECK-P8-NEXT: lxvd2x vs0, r7, r11 ; CHECK-P8-NEXT: std r3, 32(r1) ; CHECK-P8-NEXT: std r4, 40(r1) ; CHECK-P8-NEXT: std r5, 48(r1) ; CHECK-P8-NEXT: std r6, 56(r1) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -468,7 +475,8 @@ ; CHECK-P8-NEXT: addi r5, r1, -16 ; CHECK-P8-NEXT: std r4, -8(r1) ; CHECK-P8-NEXT: std r3, -16(r1) -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -494,7 +502,8 @@ ; CHECK-P8-NEXT: addi r5, r1, -16 ; CHECK-P8-NEXT: std r4, -8(r1) ; CHECK-P8-NEXT: std r3, -16(r1) -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -520,7 +529,8 @@ ; CHECK-P8-NEXT: addi r3, r1, -16 ; CHECK-P8-NEXT: std r8, -8(r1) ; CHECK-P8-NEXT: std r7, -16(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: @@ -594,14 +604,15 @@ ; CHECK-P8-NEXT: std r5, 112(r1) ; CHECK-P8-NEXT: std r6, 120(r1) ; CHECK-P8-NEXT: std r7, 128(r1) -; CHECK-P8-NEXT: addi r3, r11, .LCPI17_0@toc@l +; CHECK-P8-NEXT: addi r11, r11, .LCPI17_0@toc@l +; CHECK-P8-NEXT: lxvd2x vs0, 0, r11 ; CHECK-P8-NEXT: std r8, 136(r1) ; CHECK-P8-NEXT: std r9, 144(r1) ; CHECK-P8-NEXT: std r10, 152(r1) +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: blt cr0, .LBB17_2 ; CHECK-P8-NEXT: # %bb.1: # %if.end ; CHECK-P8-NEXT: addi r30, r1, 104 -; CHECK-P8-NEXT: lvx v3, 0, r3 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __addkf3 @@ -613,10 +624,9 @@ ; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: b .LBB17_3 -; CHECK-P8-NEXT: .LBB17_2: -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: .LBB17_3: # %cleanup +; CHECK-P8-NEXT: vmr v3, v2 +; CHECK-P8-NEXT: .LBB17_2: # %cleanup +; CHECK-P8-NEXT: vmr v2, v3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -23,12 +23,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -59,12 +61,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -95,12 +99,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -131,12 +137,14 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: bl __divkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -161,9 +169,9 @@ ; CHECK-P8-LABEL: testLdNSt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addi r3, r3, 4 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r3, r4, 8 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4 @@ -193,11 +201,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl sqrtf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -223,18 +233,18 @@ ; ; CHECK-P8-LABEL: qpCpsgn: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addi r3, r1, -16 -; CHECK-P8-NEXT: addi r4, r1, -32 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r4 -; CHECK-P8-NEXT: lbz r3, -1(r1) +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addi r4, r1, -16 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: addi r3, r1, -32 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lbz r4, -1(r1) ; CHECK-P8-NEXT: lbz r6, -17(r1) -; CHECK-P8-NEXT: rlwimi r6, r3, 0, 0, 24 +; CHECK-P8-NEXT: rlwimi r6, r4, 0, 0, 24 ; CHECK-P8-NEXT: stb r6, -17(r1) -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: stvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: blr fp128* nocapture %res) { entry: @@ -257,14 +267,14 @@ ; ; CHECK-P8-LABEL: qpAbs: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r3, r1, -16 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r5, -1(r1) ; CHECK-P8-NEXT: clrlwi r5, r5, 25 ; CHECK-P8-NEXT: stb r5, -1(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 @@ -285,20 +295,20 @@ ; ; CHECK-P8-LABEL: qpNAbs: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r3, r1, -32 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r5, -17(r1) ; CHECK-P8-NEXT: clrlwi r5, r5, 25 ; CHECK-P8-NEXT: stb r5, -17(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r3, r1, -16 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r5, -1(r1) ; CHECK-P8-NEXT: xori r5, r5, 128 ; CHECK-P8-NEXT: stb r5, -1(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 @@ -319,14 +329,14 @@ ; ; CHECK-P8-LABEL: qpNeg: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r3, r1, -16 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r5, -1(r1) ; CHECK-P8-NEXT: xori r5, r5, 128 ; CHECK-P8-NEXT: stb r5, -1(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: blr entry: %0 = load fp128, fp128* %a, align 16 @@ -359,7 +369,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl sinf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -396,7 +407,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl cosf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -433,7 +445,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl logf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -470,7 +483,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl log10f128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -507,7 +521,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl log2f128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -545,8 +560,10 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl fminf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -586,8 +603,10 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl fmaxf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -627,8 +646,10 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl powf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -667,7 +688,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl expf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -704,7 +726,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl exp2f128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -749,13 +772,15 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lwz r3, 0(r4) ; CHECK-P8-NEXT: mr r30, r5 ; CHECK-P8-NEXT: mr r5, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __powikf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -806,8 +831,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a@toc@l ; CHECK-P8-NEXT: addi r4, r4, b@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl fmodf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -838,11 +865,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl ceilf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -873,11 +902,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl floorf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -908,11 +939,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl truncf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -943,11 +976,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl roundf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -991,8 +1026,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl lroundf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -1039,8 +1075,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl llroundf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -1074,11 +1111,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl rintf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1122,8 +1161,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl lrintf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -1170,8 +1210,9 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl llrintf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -1205,11 +1246,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl nearbyintf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1242,13 +1285,17 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v4, vs2 ; CHECK-P8-NEXT: bl fmaf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/f128-compare.ll b/llvm/test/CodeGen/PowerPC/f128-compare.ll --- a/llvm/test/CodeGen/PowerPC/f128-compare.ll +++ b/llvm/test/CodeGen/PowerPC/f128-compare.ll @@ -35,8 +35,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __gtkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -81,8 +83,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __ltkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31 @@ -125,8 +129,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __gekf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31 @@ -170,8 +176,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __lekf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -217,8 +225,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __eqkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: cntlzw r3, r3 @@ -261,8 +271,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __gtkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -308,8 +320,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __ltkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31 @@ -354,8 +368,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __gekf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: rlwinm r3, r3, 1, 31, 31 @@ -399,8 +415,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __lekf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -445,8 +463,10 @@ ; CHECK-P8-NEXT: addis r4, r2, b_qp@toc@ha ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __nekf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: cntlzw r3, r3 @@ -496,9 +516,11 @@ ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha -; CHECK-P8-NEXT: lvx v30, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v30, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __gtkf2 @@ -557,9 +579,11 @@ ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha -; CHECK-P8-NEXT: lvx v30, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v30, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __ltkf2 @@ -619,9 +643,11 @@ ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha -; CHECK-P8-NEXT: lvx v30, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v30, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __gekf2 @@ -681,9 +707,11 @@ ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha -; CHECK-P8-NEXT: lvx v30, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v30, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __lekf2 @@ -742,9 +770,11 @@ ; CHECK-P8-NEXT: addi r4, r4, b_qp@toc@l ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addis r3, r2, a_qp@toc@ha -; CHECK-P8-NEXT: lvx v30, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, a_qp@toc@l -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v30, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __eqkf2 diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -35,7 +35,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -85,7 +86,8 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floattikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -125,7 +127,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -163,7 +166,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -206,7 +210,8 @@ ; CHECK-P8-NEXT: iselgt r3, r4, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -241,7 +246,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -291,7 +297,8 @@ ; CHECK-P8-NEXT: mr r4, r5 ; CHECK-P8-NEXT: bl __floatuntikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -331,7 +338,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -369,7 +377,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -406,7 +415,8 @@ ; CHECK-P8-NEXT: clrldi r3, r4, 63 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -446,8 +456,9 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -491,8 +502,9 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mr r3, r30 -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -531,7 +543,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -567,7 +580,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -609,7 +623,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -646,7 +661,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -682,7 +698,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -724,7 +741,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -765,7 +783,8 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -803,7 +822,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -840,7 +860,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -882,7 +903,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -923,7 +945,8 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -963,7 +986,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -999,7 +1023,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1041,7 +1066,8 @@ ; CHECK-P8-NEXT: mr r3, r4 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1082,7 +1108,8 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1125,7 +1152,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1161,7 +1189,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfd f1, 0(r30) @@ -1204,7 +1233,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC7@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfdf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 3 @@ -1244,9 +1274,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfdf2 @@ -1284,7 +1316,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1321,7 +1354,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC6@toc@ha ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC6@toc@l(r4) -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stfs f1, 0(r30) @@ -1366,7 +1400,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC7@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 48 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __trunckfsf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r3, r30, 2 @@ -1407,9 +1442,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __trunckfsf2 @@ -1479,8 +1516,9 @@ ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1516,8 +1554,9 @@ ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1556,8 +1595,9 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: sldi r3, r30, 4 -; CHECK-P8-NEXT: stvx v2, r29, r3 +; CHECK-P8-NEXT: stxvd2x vs0, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1593,7 +1633,8 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extenddfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1653,8 +1694,9 @@ ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1690,8 +1732,9 @@ ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addis r3, r2, .LC8@toc@ha +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: ld r3, .LC8@toc@l(r3) -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -1730,8 +1773,9 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: sldi r3, r30, 4 -; CHECK-P8-NEXT: stvx v2, r29, r3 +; CHECK-P8-NEXT: stxvd2x vs0, r29, r3 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1767,7 +1811,8 @@ ; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: bl __extendsfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1805,7 +1850,8 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1841,7 +1887,8 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1879,7 +1926,8 @@ ; CHECK-P8-NEXT: extsw r3, r3 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1915,7 +1963,8 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatdikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1953,7 +2002,8 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -1989,7 +2039,8 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2027,7 +2078,8 @@ ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: bl __floatunsikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2063,7 +2115,8 @@ ; CHECK-P8-NEXT: mffprd r3, f0 ; CHECK-P8-NEXT: bl __floatundikf ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -2100,7 +2153,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -2137,7 +2191,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfti ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -2166,7 +2221,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -2195,7 +2251,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll --- a/llvm/test/CodeGen/PowerPC/f128-fma.ll +++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll @@ -24,22 +24,26 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v31, vs2 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: li r3, 48 -; CHECK-P8-NEXT: stvx v2, 0, r30 -; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -75,23 +79,27 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: li r3, 48 -; CHECK-P8-NEXT: stvx v2, 0, r30 -; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -130,16 +138,20 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 ; CHECK-P8-NEXT: mr r29, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: lvx v3, 0, r29 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -178,28 +190,32 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: li r7, 64 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 48 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 63(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 63(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload -; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -240,22 +256,26 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -80(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 ; CHECK-P8-NEXT: mr r29, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: lvx v3, 0, r29 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 32 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 47(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 47(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -295,23 +315,27 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: li r3, 48 -; CHECK-P8-NEXT: stvx v2, 0, r30 -; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -350,16 +374,20 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -64(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 ; CHECK-P8-NEXT: mr r29, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: lvx v3, 0, r29 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -399,28 +427,32 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: li r7, 64 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: lvx v3, 0, r5 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r6 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: vmr v2, v31 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 48 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 63(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 63(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r3, 64 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload -; CHECK-P8-NEXT: stvx v2, 0, r30 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: addi r1, r1, 96 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -461,22 +493,26 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -80(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r6 ; CHECK-P8-NEXT: mr r29, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __mulkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: lvx v3, 0, r29 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 32 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 47(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 47(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 80 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll --- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll +++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll @@ -18,7 +18,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: blr entry: ret fp128 0xL00000000000000004001400000000000 @@ -46,7 +47,8 @@ ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -134,10 +136,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: sldi r4, r4, 4 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: add r4, r3, r4 ; CHECK-P8-NEXT: addi r4, r4, -16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -197,40 +201,38 @@ ; CHECK-P8-NEXT: li r3, 48 ; CHECK-P8-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 64 -; CHECK-P8-NEXT: vmr v21, v4 ; CHECK-P8-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 80 -; CHECK-P8-NEXT: vmr v22, v5 +; CHECK-P8-NEXT: vmr v22, v4 ; CHECK-P8-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 96 -; CHECK-P8-NEXT: vmr v23, v6 +; CHECK-P8-NEXT: vmr v23, v5 ; CHECK-P8-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 112 -; CHECK-P8-NEXT: vmr v24, v7 +; CHECK-P8-NEXT: vmr v24, v6 ; CHECK-P8-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 128 -; CHECK-P8-NEXT: vmr v25, v8 +; CHECK-P8-NEXT: vmr v25, v7 ; CHECK-P8-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 144 -; CHECK-P8-NEXT: vmr v26, v9 +; CHECK-P8-NEXT: vmr v26, v8 ; CHECK-P8-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 160 -; CHECK-P8-NEXT: vmr v27, v10 +; CHECK-P8-NEXT: vmr v27, v9 ; CHECK-P8-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 176 -; CHECK-P8-NEXT: vmr v28, v11 +; CHECK-P8-NEXT: vmr v28, v10 ; CHECK-P8-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 192 +; CHECK-P8-NEXT: vmr v29, v11 ; CHECK-P8-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: li r3, 208 ; CHECK-P8-NEXT: vmr v30, v12 ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: addi r3, r1, 448 ; CHECK-P8-NEXT: vmr v31, v13 -; CHECK-P8-NEXT: lvx v29, 0, r3 -; CHECK-P8-NEXT: bl __addkf3 -; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: vmr v3, v21 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v21, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v22 @@ -254,13 +256,16 @@ ; CHECK-P8-NEXT: vmr v3, v28 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: vmr v3, v29 +; CHECK-P8-NEXT: bl __addkf3 +; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v30 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v31 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: vmr v3, v29 +; CHECK-P8-NEXT: vmr v3, v21 ; CHECK-P8-NEXT: bl __subkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: li r3, 208 @@ -429,7 +434,7 @@ ; CHECK-P8-NEXT: .cfi_offset v31, -32 ; CHECK-P8-NEXT: li r3, 48 ; CHECK-P8-NEXT: add r4, r7, r9 -; CHECK-P8-NEXT: vmr v4, v2 +; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: lwz r3, 176(r1) @@ -438,9 +443,9 @@ ; CHECK-P8-NEXT: add r3, r4, r3 ; CHECK-P8-NEXT: clrldi r3, r3, 32 ; CHECK-P8-NEXT: std r3, 0(r6) -; CHECK-P8-NEXT: lvx v3, 0, r8 -; CHECK-P8-NEXT: vmr v2, v3 -; CHECK-P8-NEXT: vmr v3, v4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r8 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxlor v2, vs0, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: fmr f1, f31 @@ -501,7 +506,7 @@ ; CHECK-P8-NEXT: .cfi_offset f31, -8 ; CHECK-P8-NEXT: .cfi_offset v31, -32 ; CHECK-P8-NEXT: add r4, r4, r6 -; CHECK-P8-NEXT: vmr v4, v2 +; CHECK-P8-NEXT: vmr v3, v2 ; CHECK-P8-NEXT: li r9, 48 ; CHECK-P8-NEXT: stfd f31, 72(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: add r4, r4, r7 @@ -510,9 +515,9 @@ ; CHECK-P8-NEXT: add r4, r4, r8 ; CHECK-P8-NEXT: clrldi r4, r4, 32 ; CHECK-P8-NEXT: std r4, 0(r3) -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: vmr v2, v3 -; CHECK-P8-NEXT: vmr v3, v4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxlor v2, vs0, vs0 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: fmr f1, f31 @@ -572,15 +577,18 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 ; CHECK-P8-NEXT: ld r4, 184(r1) ; CHECK-P8-NEXT: li r3, 48 -; CHECK-P8-NEXT: stvx v2, 0, r9 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r5 ; CHECK-P8-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill ; CHECK-P8-NEXT: mr r3, r10 -; CHECK-P8-NEXT: stvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v31, 0, r9 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r9 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r9 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 @@ -633,14 +641,17 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: .cfi_offset r30, -16 ; CHECK-P8-NEXT: .cfi_offset v31, -32 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: stvx v2, 0, r4 -; CHECK-P8-NEXT: stvx v3, 0, r7 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: mr r3, r5 ; CHECK-P8-NEXT: stvx v31, r1, r6 # 16-byte Folded Spill -; CHECK-P8-NEXT: lvx v31, 0, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r7 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v31, vs0 ; CHECK-P8-NEXT: bl __floatsikf ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: vmr v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll --- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll +++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll @@ -22,11 +22,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl truncf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -57,11 +59,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl rintf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -92,11 +96,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl nearbyintf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -127,11 +133,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl roundf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -162,11 +170,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl floorf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -197,11 +207,13 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: mr r30, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl ceilf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll --- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll @@ -28,7 +28,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -66,7 +67,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -106,10 +108,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfdi @@ -149,9 +153,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfdi @@ -201,7 +207,8 @@ ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r4, r30, 3 @@ -240,7 +247,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -278,7 +286,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: std r3, 0(r30) @@ -318,10 +327,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfdi @@ -361,9 +372,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfdi @@ -412,7 +425,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha ; CHECK-P8-NEXT: mr r29, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfdi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sldi r4, r30, 3 @@ -452,7 +466,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -491,7 +506,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -532,10 +548,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -576,9 +594,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -617,7 +637,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -655,7 +676,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixunskfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stw r3, 0(r30) @@ -696,10 +718,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfsi @@ -739,9 +763,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixunskfsi @@ -781,7 +807,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -819,7 +846,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sth r3, 0(r30) @@ -859,10 +887,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -902,9 +932,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -942,7 +974,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -979,7 +1012,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: sth r3, 0(r30) @@ -1019,10 +1053,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1061,9 +1097,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1101,7 +1139,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: extsw r3, r3 @@ -1139,7 +1178,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stb r3, 0(r30) @@ -1179,10 +1219,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1222,9 +1264,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1262,7 +1306,8 @@ ; CHECK-P8-NEXT: stdu r1, -32(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 @@ -1299,7 +1344,8 @@ ; CHECK-P8-NEXT: mr r30, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) ; CHECK-P8-NEXT: addi r4, r4, 32 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl __fixkfsi ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: stb r3, 0(r30) @@ -1339,10 +1385,12 @@ ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1381,9 +1429,11 @@ ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P8-NEXT: std r0, 16(r1) ; CHECK-P8-NEXT: stdu r1, -48(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: bl __addkf3 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: bl __fixkfsi @@ -1482,7 +1532,8 @@ ; CHECK-P8-NEXT: mr r30, r5 ; CHECK-P8-NEXT: bl __trunctfkf2 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll --- a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll +++ b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll @@ -55,7 +55,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-NEXT: lvx v3, 0, r3 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd vs35, vs0 ; CHECK-NEXT: xxland vs34, vs34, vs35 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll @@ -157,12 +157,14 @@ ; CHECK-P8-NEXT: stdu r1, -48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: xxswapd vs0, v4 ; CHECK-P8-NEXT: addi r3, r1, 32 -; CHECK-P8-NEXT: stvx v4, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 47(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 47(r1) -; CHECK-P8-NEXT: lvx v4, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v4, vs0 ; CHECK-P8-NEXT: bl fmaf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 48 @@ -193,12 +195,14 @@ ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: bl fmaf128 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 32 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 47(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 47(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r1, r1, 48 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 @@ -225,20 +229,24 @@ ; CHECK-P8-NEXT: stdu r1, -64(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 ; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: xxswapd vs0, v4 ; CHECK-P8-NEXT: addi r3, r1, 32 -; CHECK-P8-NEXT: stvx v4, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 47(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 47(r1) -; CHECK-P8-NEXT: lvx v4, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v4, vs0 ; CHECK-P8-NEXT: bl fmaf128 ; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: addi r3, r1, 48 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: lbz r4, 63(r1) ; CHECK-P8-NEXT: xori r4, r4, 128 ; CHECK-P8-NEXT: stb r4, 63(r1) -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) ; CHECK-P8-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll --- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll @@ -23,14 +23,17 @@ ; LE-NEXT: ld 3, -8(1) ; LE-NEXT: addi 3, 3, 15 ; LE-NEXT: rldicr 3, 3, 0, 59 -; LE-NEXT: addi 4, 3, 31 -; LE-NEXT: addi 5, 3, 16 +; LE-NEXT: addi 4, 3, 16 +; LE-NEXT: std 4, -8(1) +; LE-NEXT: ld 4, -8(1) +; LE-NEXT: lxvd2x 0, 0, 3 +; LE-NEXT: addi 4, 4, 15 ; LE-NEXT: rldicr 4, 4, 0, 59 -; LE-NEXT: std 5, -8(1) +; LE-NEXT: xxswapd 34, 0 ; LE-NEXT: addi 5, 4, 16 -; LE-NEXT: lvx 2, 0, 3 ; LE-NEXT: std 5, -8(1) -; LE-NEXT: lvx 3, 0, 4 +; LE-NEXT: lxvd2x 1, 0, 4 +; LE-NEXT: xxswapd 35, 1 ; LE-NEXT: blr %args = alloca i8*, align 4 %x = va_arg i8** %args, <8 x i32> diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -83,8 +83,9 @@ ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxspltw v2, vs0, 1 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxspltw vs0, vs0, 1 +; P8-NEXT: xxswapd vs0, vs0 +; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test2: @@ -139,8 +140,9 @@ ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxspltw v2, vs0, 1 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxspltw vs0, vs0, 1 +; P8-NEXT: xxswapd vs0, vs0 +; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test3: @@ -428,7 +430,8 @@ ; P8-NEXT: lhzx r4, 0, r4 ; P8-NEXT: mtvsrwz v2, r4 ; P8-NEXT: vsplth v2, v2, 3 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxswapd vs0, v2 +; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test7: @@ -489,7 +492,8 @@ ; P8-NEXT: lbzx r4, 0, r4 ; P8-NEXT: mtvsrwz v2, r4 ; P8-NEXT: vspltb v2, v2, 7 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxswapd vs0, v2 +; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test8: @@ -629,7 +633,8 @@ ; ; P8-LABEL: unadjusted_lxvwsx_v16i8: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 +; P8-NEXT: lxvd2x vs0, 0, r3 +; P8-NEXT: xxswapd v2, vs0 ; P8-NEXT: xxspltw v2, v2, 3 ; P8-NEXT: blr ; @@ -670,7 +675,8 @@ ; ; P8-LABEL: adjusted_lxvwsx_v16i8: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 +; P8-NEXT: lxvd2x vs0, 0, r3 +; P8-NEXT: xxswapd v2, vs0 ; P8-NEXT: xxspltw v2, v2, 2 ; P8-NEXT: blr ; @@ -712,7 +718,8 @@ ; ; P8-LABEL: adjusted_lxvwsx_v16i8_2: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 +; P8-NEXT: lxvd2x vs0, 0, r3 +; P8-NEXT: xxswapd v2, vs0 ; P8-NEXT: xxspltw v2, v2, 1 ; P8-NEXT: blr ; @@ -754,7 +761,8 @@ ; ; P8-LABEL: adjusted_lxvwsx_v16i8_3: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 +; P8-NEXT: lxvd2x vs0, 0, r3 +; P8-NEXT: xxswapd v2, vs0 ; P8-NEXT: xxspltw v2, v2, 0 ; P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll --- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll +++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll @@ -71,10 +71,12 @@ ; CHECK-P8-LABEL: load_swap10: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap10: @@ -109,10 +111,12 @@ ; CHECK-P8-LABEL: load_swap11: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap11: @@ -147,10 +151,12 @@ ; CHECK-P8-LABEL: load_swap20: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap20: @@ -185,10 +191,12 @@ ; CHECK-P8-LABEL: load_swap21: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap21: @@ -223,10 +231,12 @@ ; CHECK-P8-LABEL: load_swap30: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap30: @@ -258,10 +268,12 @@ ; CHECK-P8-LABEL: load_swap31: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap31: @@ -321,10 +333,12 @@ ; CHECK-P8-LABEL: load_swap50: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap50: @@ -359,10 +373,12 @@ ; CHECK-P8-LABEL: load_swap51: ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap51: @@ -452,9 +468,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store10: @@ -489,9 +507,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store11: @@ -526,9 +546,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store20: @@ -563,9 +585,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store21: @@ -600,9 +624,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store30: @@ -634,9 +660,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store31: @@ -722,9 +750,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store50: @@ -759,9 +789,11 @@ ; CHECK-P8: # %bb.0: ; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 -; CHECK-P8-NEXT: stvx v2, 0, r7 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store51: diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -300,8 +300,8 @@ ; LE-PWR8-LABEL: testUnalignedLdSt: ; LE-PWR8: # %bb.0: # %entry ; LE-PWR8-NEXT: addis r3, r2, f@toc@ha -; LE-PWR8-NEXT: li r4, 59 -; LE-PWR8-NEXT: li r5, 43 +; LE-PWR8-NEXT: li r4, 43 +; LE-PWR8-NEXT: li r5, 59 ; LE-PWR8-NEXT: addi r3, r3, f@toc@l ; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 ; LE-PWR8-NEXT: li r4, 11 @@ -309,8 +309,8 @@ ; LE-PWR8-NEXT: li r5, 27 ; LE-PWR8-NEXT: lxvd2x vs2, r3, r4 ; LE-PWR8-NEXT: lxvd2x vs3, r3, r5 -; LE-PWR8-NEXT: li r4, 51 -; LE-PWR8-NEXT: li r5, 67 +; LE-PWR8-NEXT: li r4, 67 +; LE-PWR8-NEXT: li r5, 51 ; LE-PWR8-NEXT: stxvd2x vs1, r3, r4 ; LE-PWR8-NEXT: li r4, 35 ; LE-PWR8-NEXT: stxvd2x vs0, r3, r5 @@ -576,17 +576,15 @@ ; LE-PWR8-LABEL: testUnalignedLdStPair: ; LE-PWR8: # %bb.0: # %entry ; LE-PWR8-NEXT: addis r3, r2, g@toc@ha -; LE-PWR8-NEXT: li r4, 27 -; LE-PWR8-NEXT: li r5, 11 -; LE-PWR8-NEXT: li r6, 19 -; LE-PWR8-NEXT: li r8, 35 +; LE-PWR8-NEXT: li r4, 11 +; LE-PWR8-NEXT: li r5, 27 ; LE-PWR8-NEXT: addi r3, r3, g@toc@l ; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 -; LE-PWR8-NEXT: ldx r5, r3, r5 -; LE-PWR8-NEXT: ldx r7, r3, r6 -; LE-PWR8-NEXT: stdx r7, r3, r4 -; LE-PWR8-NEXT: stdx r5, r3, r6 -; LE-PWR8-NEXT: stxvd2x vs0, r3, r8 +; LE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; LE-PWR8-NEXT: li r4, 35 +; LE-PWR8-NEXT: li r5, 19 +; LE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; LE-PWR8-NEXT: stxvd2x vs0, r3, r5 ; LE-PWR8-NEXT: blr ; ; BE-PWR9-LABEL: testUnalignedLdStPair: diff --git a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll --- a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll +++ b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll @@ -30,7 +30,8 @@ ; CHECK-NEXT: xvcvdpsp 35, 1 ; CHECK-NEXT: vmrgew 2, 2, 3 ; CHECK-NEXT: .loc 1 3 9 is_stmt 0 -; CHECK-NEXT: stvx 2, 0, 4 +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: stxvd2x 0, 0, 4 ; CHECK-NEXT: .loc 1 4 1 is_stmt 1 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll --- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll @@ -1733,10 +1733,10 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob11PtrPlus0: @@ -1788,10 +1788,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha ; CHECK-P8-LE-NEXT: li r4, 3 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob11PtrPlus3: @@ -1844,10 +1844,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha ; CHECK-P8-LE-NEXT: li r4, 4 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob11PtrPlus4: @@ -1899,10 +1899,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd11@toc@ha ; CHECK-P8-LE-NEXT: li r4, 16 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd11@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt11@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt11@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob11PtrPlus16: @@ -1959,10 +1959,10 @@ ; CHECK-P8-LE-NEXT: addis r4, r2, GlobLd11@toc@ha ; CHECK-P8-LE-NEXT: sldi r3, r3, 4 ; CHECK-P8-LE-NEXT: addi r4, r4, GlobLd11@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r4, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r4, r3 ; CHECK-P8-LE-NEXT: addis r4, r2, GlobSt11@toc@ha ; CHECK-P8-LE-NEXT: addi r4, r4, GlobSt11@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r4, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r4, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob11PtrPlusVar: @@ -2015,10 +2015,10 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob12PtrPlus0: @@ -2070,10 +2070,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha ; CHECK-P8-LE-NEXT: li r4, 3 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob12PtrPlus3: @@ -2126,10 +2126,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha ; CHECK-P8-LE-NEXT: li r4, 4 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob12PtrPlus4: @@ -2181,10 +2181,10 @@ ; CHECK-P8-LE-NEXT: addis r3, r2, GlobLd12@toc@ha ; CHECK-P8-LE-NEXT: li r4, 16 ; CHECK-P8-LE-NEXT: addi r3, r3, GlobLd12@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: addis r3, r2, GlobSt12@toc@ha ; CHECK-P8-LE-NEXT: addi r3, r3, GlobSt12@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob12PtrPlus16: @@ -2241,10 +2241,10 @@ ; CHECK-P8-LE-NEXT: addis r4, r2, GlobLd12@toc@ha ; CHECK-P8-LE-NEXT: sldi r3, r3, 4 ; CHECK-P8-LE-NEXT: addi r4, r4, GlobLd12@toc@l -; CHECK-P8-LE-NEXT: lvx v2, r4, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r4, r3 ; CHECK-P8-LE-NEXT: addis r4, r2, GlobSt12@toc@ha ; CHECK-P8-LE-NEXT: addi r4, r4, GlobSt12@toc@l -; CHECK-P8-LE-NEXT: stvx v2, r4, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r4, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: testGlob12PtrPlusVar: diff --git a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -217,7 +217,8 @@ ret <1 x i128> %ret ; CHECK-LE-LABEL: @call_v1i128_increment_by_one -; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: xxswapd 34, [[VAL]] ; CHECK-LE: bl v1i128_increment_by_one ; CHECK-LE: blr @@ -246,8 +247,10 @@ ret <1 x i128> %ret ; CHECK-LE-LABEL: @call_v1i128_increment_by_val -; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-LE-DAG: xxswapd 34, [[VAL1]] +; CHECK-LE: xxswapd 35, [[VAL2]] ; CHECK-LE: bl v1i128_increment_by_val ; CHECK-LE: blr diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -8,7 +8,8 @@ ; LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; LE-NEXT: xxlxor 37, 37, 37 ; LE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; LE-NEXT: lvx 4, 0, 3 +; LE-NEXT: lxvd2x 0, 0, 3 +; LE-NEXT: xxswapd 36, 0 ; LE-NEXT: xxland 34, 34, 36 ; LE-NEXT: xxland 35, 35, 36 ; LE-NEXT: vcmpequw 2, 2, 5 @@ -40,11 +41,12 @@ ; LE-NEXT: vmrghh 4, 1, 4 ; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l ; LE-NEXT: vmrghh 3, 3, 6 +; LE-NEXT: lxvd2x 2, 0, 3 ; LE-NEXT: vmrghh 5, 0, 5 ; LE-NEXT: xxmrglw 0, 36, 34 ; LE-NEXT: vspltish 4, 15 ; LE-NEXT: xxmrglw 1, 37, 35 -; LE-NEXT: lvx 3, 0, 3 +; LE-NEXT: xxswapd 35, 2 ; LE-NEXT: xxmrgld 34, 1, 0 ; LE-NEXT: xxlor 34, 34, 35 ; LE-NEXT: vslh 2, 2, 4 diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -487,13 +487,15 @@ ; CHECK-P8-NEXT: addis 3, 2, .LCPI12_0@toc@ha ; CHECK-P8-NEXT: addis 4, 2, .LCPI12_1@toc@ha ; CHECK-P8-NEXT: addi 3, 3, .LCPI12_0@toc@l -; CHECK-P8-NEXT: xvmulsp 1, 35, 0 -; CHECK-P8-NEXT: lvx 3, 0, 3 +; CHECK-P8-NEXT: lxvd2x 1, 0, 3 ; CHECK-P8-NEXT: addi 3, 4, .LCPI12_1@toc@l -; CHECK-P8-NEXT: lvx 4, 0, 3 -; CHECK-P8-NEXT: xvmaddasp 35, 1, 0 -; CHECK-P8-NEXT: xvmulsp 0, 0, 36 +; CHECK-P8-NEXT: lxvd2x 3, 0, 3 +; CHECK-P8-NEXT: xxswapd 1, 1 +; CHECK-P8-NEXT: xvmulsp 2, 35, 0 +; CHECK-P8-NEXT: xxswapd 35, 3 +; CHECK-P8-NEXT: xvmaddasp 1, 2, 0 ; CHECK-P8-NEXT: xvmulsp 0, 0, 35 +; CHECK-P8-NEXT: xvmulsp 0, 0, 1 ; CHECK-P8-NEXT: xvmulsp 34, 34, 0 ; CHECK-P8-NEXT: blr ; @@ -1044,13 +1046,15 @@ ; CHECK-P8-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; CHECK-P8-NEXT: addis 4, 2, .LCPI25_1@toc@ha ; CHECK-P8-NEXT: addi 3, 3, .LCPI25_0@toc@l -; CHECK-P8-NEXT: xvmulsp 1, 34, 0 -; CHECK-P8-NEXT: lvx 2, 0, 3 +; CHECK-P8-NEXT: lxvd2x 1, 0, 3 ; CHECK-P8-NEXT: addi 3, 4, .LCPI25_1@toc@l -; CHECK-P8-NEXT: lvx 3, 0, 3 -; CHECK-P8-NEXT: xvmaddasp 34, 1, 0 -; CHECK-P8-NEXT: xvmulsp 0, 1, 35 -; CHECK-P8-NEXT: xvmulsp 34, 0, 34 +; CHECK-P8-NEXT: lxvd2x 3, 0, 3 +; CHECK-P8-NEXT: xxswapd 1, 1 +; CHECK-P8-NEXT: xvmulsp 2, 34, 0 +; CHECK-P8-NEXT: xxswapd 34, 3 +; CHECK-P8-NEXT: xvmaddasp 1, 2, 0 +; CHECK-P8-NEXT: xvmulsp 0, 2, 34 +; CHECK-P8-NEXT: xvmulsp 34, 0, 1 ; CHECK-P8-NEXT: blr ; CHECK-P8-NEXT: .LBB25_2: ; CHECK-P8-NEXT: xvsqrtsp 34, 34 diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -8,12 +8,14 @@ ; CHECK-NEXT: xscvdpspn 0, 1 ; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 1, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: xxswapd 35, 1 +; CHECK-NEXT: lxvd2x 1, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvdivsp 0, 35, 0 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 1 ; CHECK-NEXT: xvmulsp 1, 34, 35 ; CHECK-NEXT: xvmulsp 34, 1, 0 ; CHECK-NEXT: blr @@ -30,16 +32,18 @@ ; CHECK-NEXT: xscvdpspn 0, 1 ; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 1, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l -; CHECK-NEXT: lvx 4, 0, 3 +; CHECK-NEXT: xxswapd 1, 1 ; CHECK-NEXT: xxspltw 0, 0, 0 -; CHECK-NEXT: xvresp 1, 0 -; CHECK-NEXT: xvmaddasp 35, 0, 1 -; CHECK-NEXT: xvmulsp 0, 34, 36 -; CHECK-NEXT: xvnmsubasp 1, 1, 35 -; CHECK-NEXT: xvmulsp 34, 0, 1 +; CHECK-NEXT: xvresp 2, 0 +; CHECK-NEXT: xvmaddasp 1, 0, 2 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 +; CHECK-NEXT: xvnmsubasp 2, 2, 1 +; CHECK-NEXT: xvmulsp 0, 34, 35 +; CHECK-NEXT: xvmulsp 34, 0, 2 ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll --- a/llvm/test/CodeGen/PowerPC/sat-add.ll +++ b/llvm/test/CodeGen/PowerPC/sat-add.ll @@ -379,11 +379,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI24_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI24_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI24_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI24_1@toc@l +; CHECK-NEXT: xxswapd 35, 0 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vminub 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vaddubm 2, 2, 3 ; CHECK-NEXT: blr %c = icmp ult <16 x i8> %x, @@ -397,7 +399,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI25_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vaddubs 2, 2, 3 ; CHECK-NEXT: blr %a = add <16 x i8> %x, @@ -411,7 +414,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI26_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI26_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vaddubs 2, 2, 3 ; CHECK-NEXT: blr %a = add <16 x i8> %x, @@ -425,11 +429,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI27_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI27_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI27_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI27_1@toc@l +; CHECK-NEXT: xxswapd 35, 0 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vminuh 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduhm 2, 2, 3 ; CHECK-NEXT: blr %c = icmp ult <8 x i16> %x, @@ -443,7 +449,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI28_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI28_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduhs 2, 2, 3 ; CHECK-NEXT: blr %a = add <8 x i16> %x, @@ -457,7 +464,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI29_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI29_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduhs 2, 2, 3 ; CHECK-NEXT: blr %a = add <8 x i16> %x, @@ -471,11 +479,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI30_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI30_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI30_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI30_1@toc@l +; CHECK-NEXT: xxswapd 35, 0 +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vminuw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = icmp ult <4 x i32> %x, @@ -489,7 +499,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI31_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI31_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduws 2, 2, 3 ; CHECK-NEXT: blr %a = add <4 x i32> %x, @@ -503,7 +514,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI32_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI32_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduws 2, 2, 3 ; CHECK-NEXT: blr %a = add <4 x i32> %x, diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -40,10 +40,11 @@ ; P8LE-LABEL: s2v_test1: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l -; P8LE-NEXT: lvx v3, 0, r4 -; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v4, vs0 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; ; P8BE-LABEL: s2v_test1: @@ -107,9 +108,10 @@ ; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l -; P8LE-NEXT: lxsiwzx v4, 0, r3 -; P8LE-NEXT: lvx v3, 0, r4 -; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v4, vs0 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; ; P8BE-LABEL: s2v_test2: @@ -176,10 +178,11 @@ ; P8LE-LABEL: s2v_test3: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; P8LE-NEXT: sldi r5, r7, 2 ; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l -; P8LE-NEXT: lxsiwzx v3, r3, r5 -; P8LE-NEXT: lvx v4, 0, r4 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: lxsiwzx v3, r3, r4 +; P8LE-NEXT: xxswapd v4, vs0 ; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; @@ -258,9 +261,10 @@ ; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l -; P8LE-NEXT: lxsiwzx v4, 0, r3 -; P8LE-NEXT: lvx v3, 0, r4 -; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v4, vs0 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; ; P8BE-LABEL: s2v_test4: @@ -325,10 +329,11 @@ ; P8LE-LABEL: s2v_test5: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P8LE-NEXT: lxsiwzx v4, 0, r5 +; P8LE-NEXT: lxsiwzx v3, 0, r5 ; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P8LE-NEXT: lvx v3, 0, r3 -; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v4, vs0 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; ; P8BE-LABEL: s2v_test5: @@ -390,10 +395,11 @@ ; P8LE-LABEL: s2v_test_f1: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; P8LE-NEXT: lxsiwzx v4, 0, r3 +; P8LE-NEXT: lxsiwzx v3, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; P8LE-NEXT: lvx v3, 0, r4 -; P8LE-NEXT: vperm v2, v2, v4, v3 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v4, vs0 +; P8LE-NEXT: vperm v2, v2, v3, v4 ; P8LE-NEXT: blr ; ; P8BE-LABEL: s2v_test_f1: diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll --- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -33,7 +33,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l ; CHECK-NEXT: vcmpgtsw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vsubuwm 2, 3, 2 ; CHECK-NEXT: blr %c = icmp sgt <4 x i32> %x, @@ -84,7 +85,8 @@ ; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-NEXT: vcmpgtsw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = icmp sgt <4 x i32> %x, @@ -192,9 +194,10 @@ ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vsubuwm 3, 4, 3 ; CHECK-NEXT: vsraw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = xor <4 x i32> %x, @@ -222,9 +225,10 @@ ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vsubuwm 3, 4, 3 ; CHECK-NEXT: vsrw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = xor <4 x i32> %x, @@ -276,9 +280,10 @@ ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vsubuwm 3, 4, 3 ; CHECK-NEXT: vsraw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %sh = lshr <4 x i32> %x, diff --git a/llvm/test/CodeGen/PowerPC/store_fptoi.ll b/llvm/test/CodeGen/PowerPC/store_fptoi.ll --- a/llvm/test/CodeGen/PowerPC/store_fptoi.ll +++ b/llvm/test/CodeGen/PowerPC/store_fptoi.ll @@ -26,8 +26,9 @@ ; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std 0, 16(1) ; CHECK-PWR8-NEXT: stdu 1, -48(1) -; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3 ; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: xxswapd 2, 0 ; CHECK-PWR8-NEXT: bl __fixkfdi ; CHECK-PWR8-NEXT: nop ; CHECK-PWR8-NEXT: std 3, 0(30) @@ -63,8 +64,9 @@ ; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std 0, 16(1) ; CHECK-PWR8-NEXT: stdu 1, -48(1) -; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3 ; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: xxswapd 2, 0 ; CHECK-PWR8-NEXT: bl __fixkfsi ; CHECK-PWR8-NEXT: nop ; CHECK-PWR8-NEXT: stw 3, 0(30) @@ -100,8 +102,9 @@ ; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std 0, 16(1) ; CHECK-PWR8-NEXT: stdu 1, -48(1) -; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3 ; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: xxswapd 2, 0 ; CHECK-PWR8-NEXT: bl __fixunskfdi ; CHECK-PWR8-NEXT: nop ; CHECK-PWR8-NEXT: std 3, 0(30) @@ -137,8 +140,9 @@ ; CHECK-PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std 0, 16(1) ; CHECK-PWR8-NEXT: stdu 1, -48(1) -; CHECK-PWR8-NEXT: lvx 2, 0, 3 +; CHECK-PWR8-NEXT: lxvd2x 0, 0, 3 ; CHECK-PWR8-NEXT: mr 30, 4 +; CHECK-PWR8-NEXT: xxswapd 2, 0 ; CHECK-PWR8-NEXT: bl __fixunskfsi ; CHECK-PWR8-NEXT: nop ; CHECK-PWR8-NEXT: stw 3, 0(30) diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll --- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll +++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll @@ -27,21 +27,24 @@ ; CHECK-LE-P7-NEXT: addi r3, r1, -4 ; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha ; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI0_0@toc@l -; CHECK-LE-P7-NEXT: lvx v3, 0, r4 ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: lwz r3, -4(r1) ; CHECK-LE-P7-NEXT: stw r3, -32(r1) ; CHECK-LE-P7-NEXT: addi r3, r1, -32 -; CHECK-LE-P7-NEXT: lvx v4, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v3, vs0 +; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v4, vs1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; ; CHECK-LE-P8-LABEL: test: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; @@ -93,21 +96,24 @@ ; CHECK-LE-P7-NEXT: addi r3, r1, -4 ; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha ; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI1_0@toc@l -; CHECK-LE-P7-NEXT: lvx v3, 0, r4 ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: lwz r3, -4(r1) ; CHECK-LE-P7-NEXT: stw r3, -32(r1) ; CHECK-LE-P7-NEXT: addi r3, r1, -32 -; CHECK-LE-P7-NEXT: lvx v4, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v3, vs0 +; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v4, vs1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; ; CHECK-LE-P8-LABEL: test2: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; @@ -159,21 +165,24 @@ ; CHECK-LE-P7-NEXT: addi r3, r1, -4 ; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha ; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-LE-P7-NEXT: lvx v3, 0, r4 ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: lwz r3, -4(r1) ; CHECK-LE-P7-NEXT: stw r3, -32(r1) ; CHECK-LE-P7-NEXT: addi r3, r1, -32 -; CHECK-LE-P7-NEXT: lvx v4, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v3, vs0 +; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v4, vs1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; ; CHECK-LE-P8-LABEL: test3: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; @@ -225,21 +234,24 @@ ; CHECK-LE-P7-NEXT: addi r3, r1, -4 ; CHECK-LE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha ; CHECK-LE-P7-NEXT: addi r4, r4, .LCPI3_0@toc@l -; CHECK-LE-P7-NEXT: lvx v3, 0, r4 ; CHECK-LE-P7-NEXT: stfiwx f0, 0, r3 +; CHECK-LE-P7-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P7-NEXT: lwz r3, -4(r1) ; CHECK-LE-P7-NEXT: stw r3, -32(r1) ; CHECK-LE-P7-NEXT: addi r3, r1, -32 -; CHECK-LE-P7-NEXT: lvx v4, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v3, vs0 +; CHECK-LE-P7-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P7-NEXT: xxswapd v4, vs1 ; CHECK-LE-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P7-NEXT: blr ; ; CHECK-LE-P8-LABEL: test4: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll --- a/llvm/test/CodeGen/PowerPC/toc-float.ll +++ b/llvm/test/CodeGen/PowerPC/toc-float.ll @@ -150,7 +150,8 @@ ; CHECK-P8-NEXT: addis 3, 2, .LC0@toc@ha ; CHECK-P8-NEXT: ld 3, .LC0@toc@l(3) ; CHECK-P8-NEXT: addi 3, 3, 32 -; CHECK-P8-NEXT: lvx 2, 0, 3 +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 34, 0 ; CHECK-P8-NEXT: blr entry: %0 = load <4 x i32>, <4 x i32>* getelementptr inbounds ([10 x <4 x i32>], [10 x <4 x i32>]* @vec_arr, i64 0, i64 2), align 16 diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll @@ -163,36 +163,42 @@ ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: addis 6, 2, .LCPI4_0@toc@ha ; PPC64LE-NEXT: mtfprwz 0, 3 -; PPC64LE-NEXT: mtfprwz 1, 4 -; PPC64LE-NEXT: addi 3, 6, .LCPI4_0@toc@l -; PPC64LE-NEXT: addis 4, 2, .LCPI4_2@toc@ha -; PPC64LE-NEXT: lvx 2, 0, 3 -; PPC64LE-NEXT: mtvsrwz 36, 5 ; PPC64LE-NEXT: addis 3, 2, .LCPI4_1@toc@ha -; PPC64LE-NEXT: xxmrghw 35, 1, 0 +; PPC64LE-NEXT: addi 6, 6, .LCPI4_0@toc@l +; PPC64LE-NEXT: mtfprwz 2, 4 ; PPC64LE-NEXT: addi 3, 3, .LCPI4_1@toc@l -; PPC64LE-NEXT: vperm 2, 4, 3, 2 -; PPC64LE-NEXT: vspltisw 3, -11 -; PPC64LE-NEXT: lvx 4, 0, 3 +; PPC64LE-NEXT: addis 4, 2, .LCPI4_2@toc@ha +; PPC64LE-NEXT: lxvd2x 1, 0, 6 +; PPC64LE-NEXT: mtvsrwz 36, 5 +; PPC64LE-NEXT: xxmrghw 34, 2, 0 +; PPC64LE-NEXT: lxvd2x 0, 0, 3 ; PPC64LE-NEXT: addi 3, 4, .LCPI4_2@toc@l ; PPC64LE-NEXT: addis 4, 2, .LCPI4_4@toc@ha -; PPC64LE-NEXT: lvx 5, 0, 3 -; PPC64LE-NEXT: addis 3, 2, .LCPI4_3@toc@ha ; PPC64LE-NEXT: addi 4, 4, .LCPI4_4@toc@l +; PPC64LE-NEXT: xxswapd 35, 1 +; PPC64LE-NEXT: lxvd2x 1, 0, 3 +; PPC64LE-NEXT: addis 3, 2, .LCPI4_3@toc@ha ; PPC64LE-NEXT: addi 3, 3, .LCPI4_3@toc@l -; PPC64LE-NEXT: vsrw 3, 3, 3 -; PPC64LE-NEXT: vsubuwm 2, 2, 4 -; PPC64LE-NEXT: lvx 4, 0, 3 +; PPC64LE-NEXT: vperm 2, 4, 2, 3 +; PPC64LE-NEXT: vspltisw 3, -11 +; PPC64LE-NEXT: xxswapd 36, 0 +; PPC64LE-NEXT: xxswapd 37, 1 +; PPC64LE-NEXT: lxvd2x 0, 0, 3 +; PPC64LE-NEXT: lxvd2x 1, 0, 4 ; PPC64LE-NEXT: addis 3, 2, .LCPI4_5@toc@ha ; PPC64LE-NEXT: addi 3, 3, .LCPI4_5@toc@l +; PPC64LE-NEXT: vsrw 3, 3, 3 +; PPC64LE-NEXT: vsubuwm 2, 2, 4 +; PPC64LE-NEXT: xxswapd 36, 0 +; PPC64LE-NEXT: lxvd2x 0, 0, 3 ; PPC64LE-NEXT: vmuluwm 2, 2, 5 -; PPC64LE-NEXT: lvx 5, 0, 4 +; PPC64LE-NEXT: xxswapd 37, 1 ; PPC64LE-NEXT: xxland 32, 34, 35 ; PPC64LE-NEXT: vslw 2, 2, 4 ; PPC64LE-NEXT: vsrw 4, 0, 5 -; PPC64LE-NEXT: xxlor 0, 36, 34 -; PPC64LE-NEXT: lvx 2, 0, 3 -; PPC64LE-NEXT: xxland 35, 0, 35 +; PPC64LE-NEXT: xxlor 1, 36, 34 +; PPC64LE-NEXT: xxswapd 34, 0 +; PPC64LE-NEXT: xxland 35, 1, 35 ; PPC64LE-NEXT: vcmpgtuw 2, 3, 2 ; PPC64LE-NEXT: xxswapd 0, 34 ; PPC64LE-NEXT: xxsldwi 1, 34, 34, 1 diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll --- a/llvm/test/CodeGen/PowerPC/vavg.ll +++ b/llvm/test/CodeGen/PowerPC/vavg.ll @@ -150,22 +150,24 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; CHECK-P8-NEXT: vadduhm 2, 2, 3 -; CHECK-P8-NEXT: vspltish 4, 1 +; CHECK-P8-NEXT: vspltish 3, 1 ; CHECK-P8-NEXT: addi 3, 3, .LCPI6_0@toc@l -; CHECK-P8-NEXT: lvx 3, 0, 3 -; CHECK-P8-NEXT: vadduhm 2, 2, 3 -; CHECK-P8-NEXT: vsrah 2, 2, 4 +; CHECK-P8-NEXT: lxvd2x 0, 0, 3 +; CHECK-P8-NEXT: xxswapd 36, 0 +; CHECK-P8-NEXT: vadduhm 2, 2, 4 +; CHECK-P8-NEXT: vsrah 2, 2, 3 ; CHECK-P8-NEXT: blr ; ; CHECK-P7-LABEL: test_v8i16_sign_negative: ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: addis 3, 2, .LCPI6_0@toc@ha ; CHECK-P7-NEXT: vadduhm 2, 2, 3 -; CHECK-P7-NEXT: vspltish 4, 1 +; CHECK-P7-NEXT: vspltish 3, 1 ; CHECK-P7-NEXT: addi 3, 3, .LCPI6_0@toc@l -; CHECK-P7-NEXT: lvx 3, 0, 3 -; CHECK-P7-NEXT: vadduhm 2, 2, 3 -; CHECK-P7-NEXT: vsrah 2, 2, 4 +; CHECK-P7-NEXT: lxvd2x 0, 0, 3 +; CHECK-P7-NEXT: xxswapd 36, 0 +; CHECK-P7-NEXT: vadduhm 2, 2, 4 +; CHECK-P7-NEXT: vsrah 2, 2, 3 ; CHECK-P7-NEXT: blr entry: %add = add <8 x i16> %m, diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll --- a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll +++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll @@ -24,12 +24,14 @@ ; CHECK_LE-NEXT: xxswapd 35, 34 ; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK_LE-NEXT: vcmpequw 2, 2, 3 -; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: lxvd2x 0, 0, 3 ; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: xxswapd 35, 0 ; CHECK_LE-NEXT: vperm 3, 2, 2, 3 -; CHECK_LE-NEXT: xxland 34, 35, 34 -; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: xxland 0, 35, 34 +; CHECK_LE-NEXT: xxswapd 0, 0 +; CHECK_LE-NEXT: stxvd2x 0, 0, 3 ; CHECK_LE-NEXT: ld 3, -16(1) ; CHECK_LE-NEXT: blr ; @@ -66,12 +68,14 @@ ; CHECK_LE-NEXT: xxswapd 35, 34 ; CHECK_LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK_LE-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK_LE-NEXT: vcmpequw 2, 2, 3 -; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: lxvd2x 0, 0, 3 ; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: xxswapd 35, 0 ; CHECK_LE-NEXT: vperm 3, 2, 2, 3 -; CHECK_LE-NEXT: xxland 34, 35, 34 -; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: xxland 0, 35, 34 +; CHECK_LE-NEXT: xxswapd 0, 0 +; CHECK_LE-NEXT: stxvd2x 0, 0, 3 ; CHECK_LE-NEXT: ld 3, -16(1) ; CHECK_LE-NEXT: blr ; @@ -101,10 +105,11 @@ ; ; CHECK_LE-LABEL: cmpeq: ; CHECK_LE: # %bb.0: # %entry -; CHECK_LE-NEXT: vcmpequw 2, 2, 3 ; CHECK_LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 ; CHECK_LE-NEXT: addi 3, 3, .LCPI2_0@toc@l -; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: lxvd2x 0, 0, 3 +; CHECK_LE-NEXT: xxswapd 35, 0 ; CHECK_LE-NEXT: vperm 3, 2, 2, 3 ; CHECK_LE-NEXT: xxland 34, 35, 34 ; CHECK_LE-NEXT: blr @@ -132,10 +137,11 @@ ; ; CHECK_LE-LABEL: cmpne: ; CHECK_LE: # %bb.0: # %entry -; CHECK_LE-NEXT: vcmpequw 2, 2, 3 ; CHECK_LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 ; CHECK_LE-NEXT: addi 3, 3, .LCPI3_0@toc@l -; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: lxvd2x 0, 0, 3 +; CHECK_LE-NEXT: xxswapd 35, 0 ; CHECK_LE-NEXT: xxlnor 34, 34, 34 ; CHECK_LE-NEXT: vperm 3, 2, 2, 3 ; CHECK_LE-NEXT: xxlor 34, 35, 34 diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll --- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -14,31 +14,36 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI0_2@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI0_1@toc@ha +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI0_3@toc@ha ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI0_2@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI0_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI0_3@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r6 -; CHECK-P8-NEXT: lvx v1, 0, r4 +; CHECK-P8-NEXT: addi r4, r4, .LCPI0_3@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI0_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: addi r5, r5, .LCPI0_3@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI0_1@toc@ha +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: addi r5, r5, .LCPI0_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs4, 0, r5 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v5, v4, v3, v5 -; CHECK-P8-NEXT: vperm v0, v4, v3, v0 -; CHECK-P8-NEXT: vperm v3, v4, v3, v1 -; CHECK-P8-NEXT: xvcvuxddp vs0, v2 -; CHECK-P8-NEXT: xvcvuxddp vs1, v5 +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: xxswapd v1, vs4 +; CHECK-P8-NEXT: vperm v0, v4, v2, v0 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v5, v4, v2, v5 +; CHECK-P8-NEXT: vperm v2, v4, v2, v1 ; CHECK-P8-NEXT: xvcvuxddp vs2, v0 -; CHECK-P8-NEXT: xvcvuxddp vs3, v3 +; CHECK-P8-NEXT: xvcvuxddp vs0, v3 +; CHECK-P8-NEXT: xvcvuxddp vs1, v5 +; CHECK-P8-NEXT: xvcvuxddp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 16 @@ -118,17 +123,20 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r5 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 -; CHECK-P8-NEXT: vperm v3, v4, v3, v5 -; CHECK-P8-NEXT: xvcvuxddp vs0, v2 -; CHECK-P8-NEXT: xvcvuxddp vs1, v3 +; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v2, v4, v2, v5 +; CHECK-P8-NEXT: xvcvuxddp vs0, v3 +; CHECK-P8-NEXT: xvcvuxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 @@ -181,11 +189,13 @@ ; CHECK-P8-LABEL: test2: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxddp vs0, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 @@ -226,37 +236,42 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI3_1@toc@ha +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l +; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI3_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI3_1@toc@ha +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI3_4@toc@ha +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI3_3@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r6 -; CHECK-P8-NEXT: addis r6, r2, .LCPI3_4@toc@ha +; CHECK-P8-NEXT: addi r5, r5, .LCPI3_4@toc@l ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: addi r5, r5, .LCPI3_3@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r5 -; CHECK-P8-NEXT: addi r5, r6, .LCPI3_4@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r5 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs4, 0, r5 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v5, vs3 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: vperm v4, v3, v3, v4 -; CHECK-P8-NEXT: vperm v5, v3, v3, v5 -; CHECK-P8-NEXT: vperm v3, v3, v3, v0 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: xxswapd v0, vs4 +; CHECK-P8-NEXT: vperm v3, v2, v2, v3 +; CHECK-P8-NEXT: vperm v4, v2, v2, v4 +; CHECK-P8-NEXT: vperm v5, v2, v2, v5 +; CHECK-P8-NEXT: vperm v2, v2, v2, v0 ; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vsld v2, v2, v0 +; CHECK-P8-NEXT: vsld v3, v3, v0 ; CHECK-P8-NEXT: vsld v4, v4, v0 ; CHECK-P8-NEXT: vsld v5, v5, v0 -; CHECK-P8-NEXT: vsld v3, v3, v0 -; CHECK-P8-NEXT: vsrad v2, v2, v0 +; CHECK-P8-NEXT: vsld v2, v2, v0 ; CHECK-P8-NEXT: vsrad v3, v3, v0 +; CHECK-P8-NEXT: vsrad v2, v2, v0 ; CHECK-P8-NEXT: vsrad v4, v4, v0 ; CHECK-P8-NEXT: vsrad v5, v5, v0 -; CHECK-P8-NEXT: xvcvsxddp vs2, v3 -; CHECK-P8-NEXT: xvcvsxddp vs0, v2 +; CHECK-P8-NEXT: xvcvsxddp vs2, v2 +; CHECK-P8-NEXT: xvcvsxddp vs0, v3 ; CHECK-P8-NEXT: xvcvsxddp vs1, v5 ; CHECK-P8-NEXT: xvcvsxddp vs3, v4 ; CHECK-P8-NEXT: xxswapd vs2, vs2 @@ -347,24 +362,27 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI4_2@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-P8-NEXT: addi r4, r6, .LCPI4_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addi r5, r6, .LCPI4_2@toc@l ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v4, 0, r5 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 -; CHECK-P8-NEXT: vperm v3, v3, v3, v4 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: vperm v3, v2, v2, v3 +; CHECK-P8-NEXT: vperm v2, v2, v2, v4 ; CHECK-P8-NEXT: xxswapd v4, vs0 -; CHECK-P8-NEXT: vsld v2, v2, v4 ; CHECK-P8-NEXT: vsld v3, v3, v4 -; CHECK-P8-NEXT: vsrad v2, v2, v4 +; CHECK-P8-NEXT: vsld v2, v2, v4 ; CHECK-P8-NEXT: vsrad v3, v3, v4 -; CHECK-P8-NEXT: xvcvsxddp vs0, v2 -; CHECK-P8-NEXT: xvcvsxddp vs1, v3 +; CHECK-P8-NEXT: vsrad v2, v2, v4 +; CHECK-P8-NEXT: xvcvsxddp vs0, v3 +; CHECK-P8-NEXT: xvcvsxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 @@ -419,13 +437,15 @@ ; CHECK-P8-LABEL: stest2: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 ; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vsld v2, v2, v3 ; CHECK-P8-NEXT: vsrad v2, v2, v3 diff --git a/llvm/test/CodeGen/PowerPC/vec-trunc.ll b/llvm/test/CodeGen/PowerPC/vec-trunc.ll --- a/llvm/test/CodeGen/PowerPC/vec-trunc.ll +++ b/llvm/test/CodeGen/PowerPC/vec-trunc.ll @@ -9,7 +9,8 @@ define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx v2, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: xxswapd v2, vs0 ; CHECK-NEXT: vpkuhum v2, v2, v2 ; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stfdx f0, 0, r3 @@ -34,7 +35,8 @@ define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx v2, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: xxswapd v2, vs0 ; CHECK-NEXT: vpkuhum v2, v2, v2 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 2 ; CHECK-NEXT: stfiwx f0, 0, r3 @@ -60,10 +62,12 @@ ; CHECK-LABEL: test4i8w: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-NEXT: lvx v3, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 ; CHECK-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-NEXT: lvx v2, 0, r5 -; CHECK-NEXT: vperm v2, v3, v3, v2 +; CHECK-NEXT: lxvd2x vs1, 0, r5 +; CHECK-NEXT: xxswapd v2, vs0 +; CHECK-NEXT: xxswapd v3, vs1 +; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 2 ; CHECK-NEXT: stfiwx f0, 0, r3 ; CHECK-NEXT: blr @@ -90,7 +94,8 @@ define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx v2, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: xxswapd v2, vs0 ; CHECK-NEXT: vpkuhum v2, v2, v2 ; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: mffprd r4, f0 @@ -117,7 +122,8 @@ define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx v2, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: xxswapd v2, vs0 ; CHECK-NEXT: vpkuwum v2, v2, v2 ; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stfdx f0, 0, r3 @@ -142,7 +148,8 @@ define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx v2, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: xxswapd v2, vs0 ; CHECK-NEXT: vpkuwum v2, v2, v2 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 2 ; CHECK-NEXT: stfiwx f0, 0, r3 @@ -167,11 +174,12 @@ define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly %SrcPtr) { ; CHECK-LABEL: test2i16d: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxvd2x vs0, 0, r4 ; CHECK-NEXT: addis r5, r2, .LCPI6_0@toc@ha -; CHECK-NEXT: addi r4, r5, .LCPI6_0@toc@l -; CHECK-NEXT: lvx v3, 0, r4 +; CHECK-NEXT: lxvd2x vs0, 0, r4 +; CHECK-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-NEXT: lxvd2x vs1, 0, r5 ; CHECK-NEXT: xxswapd v2, vs0 +; CHECK-NEXT: xxswapd v3, vs1 ; CHECK-NEXT: vperm v2, v2, v2, v3 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 2 ; CHECK-NEXT: stfiwx f0, 0, r3 diff --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll --- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll +++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll @@ -9,19 +9,20 @@ define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) { ; CHECK-LABEL: test8x32: ; CHECK: # %bb.0: +; CHECK-NEXT: addis r11, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: rldimi r3, r4, 32, 0 ; CHECK-NEXT: rldimi r5, r6, 32, 0 -; CHECK-NEXT: addis r11, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: rldimi r7, r8, 32, 0 -; CHECK-NEXT: rldimi r9, r10, 32, 0 ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: addi r3, r11, .LCPI0_0@toc@l +; CHECK-NEXT: rldimi r7, r8, 32, 0 +; CHECK-NEXT: rldimi r9, r10, 32, 0 +; CHECK-NEXT: lxvd2x vs3, 0, r3 ; CHECK-NEXT: mtfprd f1, r5 -; CHECK-NEXT: lvx v4, 0, r3 ; CHECK-NEXT: mtfprd f2, r7 -; CHECK-NEXT: mtfprd f3, r9 +; CHECK-NEXT: mtfprd f4, r9 ; CHECK-NEXT: xxmrghd v2, vs1, vs0 -; CHECK-NEXT: xxmrghd v3, vs3, vs2 +; CHECK-NEXT: xxswapd v4, vs3 +; CHECK-NEXT: xxmrghd v3, vs4, vs2 ; CHECK-NEXT: vperm v2, v3, v2, v4 ; CHECK-NEXT: blr ; @@ -79,13 +80,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addis r7, r2, .LCPI1_0@toc@ha ; CHECK-NEXT: mtfprd f0, r5 +; CHECK-NEXT: addi r5, r7, .LCPI1_0@toc@l ; CHECK-NEXT: mtfprd f1, r6 +; CHECK-NEXT: lxvd2x vs3, 0, r5 ; CHECK-NEXT: mtfprd f2, r3 -; CHECK-NEXT: addi r3, r7, .LCPI1_0@toc@l -; CHECK-NEXT: mtfprd f3, r4 +; CHECK-NEXT: mtfprd f4, r4 ; CHECK-NEXT: xxmrghd v2, vs1, vs0 -; CHECK-NEXT: lvx v4, 0, r3 -; CHECK-NEXT: xxmrghd v3, vs3, vs2 +; CHECK-NEXT: xxmrghd v3, vs4, vs2 +; CHECK-NEXT: xxswapd v4, vs3 ; CHECK-NEXT: vperm v2, v2, v3, v4 ; CHECK-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll --- a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll +++ b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll @@ -9,10 +9,11 @@ define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone { ; CHECK-LABEL: v2si64_cmp: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vperm 3, 2, 2, 3 ; CHECK-NEXT: xxland 34, 35, 34 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll --- a/llvm/test/CodeGen/PowerPC/vec_constants.ll +++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll @@ -21,17 +21,23 @@ ; ; LE-LABEL: test1: ; LE: # %bb.0: -; LE-NEXT: lvx 2, 0, 3 +; LE-NEXT: lxvd2x 0, 0, 3 +; LE-NEXT: xxswapd 34, 0 ; LE-NEXT: vspltisb 3, -1 ; LE-NEXT: vslw 3, 3, 3 -; LE-NEXT: xxland 34, 34, 35 -; LE-NEXT: stvx 2, 0, 3 -; LE-NEXT: lvx 2, 0, 4 -; LE-NEXT: xxlandc 34, 34, 35 -; LE-NEXT: stvx 2, 0, 4 -; LE-NEXT: lvx 2, 0, 5 -; LE-NEXT: xvabssp 34, 34 -; LE-NEXT: stvx 2, 0, 5 +; LE-NEXT: xxland 0, 34, 35 +; LE-NEXT: xxswapd 0, 0 +; LE-NEXT: stxvd2x 0, 0, 3 +; LE-NEXT: lxvd2x 0, 0, 4 +; LE-NEXT: xxswapd 34, 0 +; LE-NEXT: xxlandc 0, 34, 35 +; LE-NEXT: xxswapd 0, 0 +; LE-NEXT: stxvd2x 0, 0, 4 +; LE-NEXT: lxvd2x 0, 0, 5 +; LE-NEXT: xxswapd 34, 0 +; LE-NEXT: xvabssp 0, 34 +; LE-NEXT: xxswapd 0, 0 +; LE-NEXT: stxvd2x 0, 0, 5 ; LE-NEXT: blr %tmp = load <4 x i32>, <4 x i32>* %P1 ; <<4 x i32>> [#uses=1] %tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -166,53 +166,53 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r4, f3 ; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f6 +; CHECK-P8-NEXT: xscvdpsxws f2, f7 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 ; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghh v2, v2, v4 +; CHECK-P8-NEXT: vmrghh v3, v3, v5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrghh v4, v4, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v4, v5 +; CHECK-P8-NEXT: vmrghh v4, v4, v0 +; CHECK-P8-NEXT: vmrghh v5, v5, v1 +; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; @@ -329,105 +329,107 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v3, r4, r5 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v5 -; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvspdpn f4, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f7, v2 +; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f5, vs3 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 ; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v2, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 ; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f10, v4 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f0 ; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xscvdpsxws f5, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f3, v0 ; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f8 -; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: vmrghh v2, v2, v1 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: vmrghh v3, v3, v1 ; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd vs5, v2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: vmrghh v3, v0, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f1, vs5 -; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 -; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f10 +; CHECK-P8-NEXT: vmrghh v4, v5, v1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v5, v1 -; CHECK-P8-NEXT: vmrghh v5, v6, v0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f1, f4 ; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f6 +; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: xscvspdpn f0, vs2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f7 ; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v0, v0, v7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v6, v6, v8 ; CHECK-P8-NEXT: mtvsrd v8, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: xxmrglw vs1, v0, v5 +; CHECK-P8-NEXT: vmrghh v5, v5, v9 ; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v7, v8, v7 -; CHECK-P8-NEXT: vmrghh v6, v6, v9 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: vmrghh v7, v7, v8 +; CHECK-P8-NEXT: xxmrglw vs1, v6, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: vmrghh v0, v0, v8 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v1, v4 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrglw vs3, v6, v7 +; CHECK-P8-NEXT: xxmrglw vs2, v7, v5 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxmrglw vs3, v0, v1 ; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -796,53 +798,53 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r4, f3 ; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f6 +; CHECK-P8-NEXT: xscvdpsxws f2, f7 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 ; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghh v2, v2, v4 +; CHECK-P8-NEXT: vmrghh v3, v3, v5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: vmrghh v4, v4, v1 -; CHECK-P8-NEXT: xxmrglw vs1, v4, v5 +; CHECK-P8-NEXT: vmrghh v4, v4, v0 +; CHECK-P8-NEXT: vmrghh v5, v5, v1 +; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: blr ; @@ -959,105 +961,107 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v3, r4, r5 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f1, v5 -; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvspdpn f4, vs1 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: xxsldwi vs6, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f7, v2 +; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f5, vs3 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 ; CHECK-P8-NEXT: xscvspdpn f2, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v2, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 ; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f10, v4 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f0 ; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xscvdpsxws f5, f7 -; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f3, v0 ; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f8 -; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: mffprwz r4, f4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 3 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: vmrghh v2, v2, v1 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r4, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v4, v4, 1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: vmrghh v3, v3, v1 ; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: xxswapd vs5, v2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: vmrghh v3, v0, v3 -; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f1, vs5 -; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 -; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: xscvdpsxws f1, f10 +; CHECK-P8-NEXT: vmrghh v4, v5, v1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v0, v0, 1 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghh v2, v5, v1 -; CHECK-P8-NEXT: vmrghh v5, v6, v0 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f2, f3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f1, f4 ; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xscvspdpn f1, vs2 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f3, f6 +; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: xscvspdpn f0, vs2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f7 ; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: vmrghh v0, v0, v7 -; CHECK-P8-NEXT: mtvsrd v7, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 -; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v6, v6, v8 ; CHECK-P8-NEXT: mtvsrd v8, r4 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: vmrghh v1, v1, v9 -; CHECK-P8-NEXT: xxmrglw vs1, v0, v5 +; CHECK-P8-NEXT: vmrghh v5, v5, v9 ; CHECK-P8-NEXT: mtvsrd v9, r4 -; CHECK-P8-NEXT: vmrghh v7, v8, v7 -; CHECK-P8-NEXT: vmrghh v6, v6, v9 +; CHECK-P8-NEXT: mffprwz r4, f0 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 +; CHECK-P8-NEXT: vmrghh v7, v7, v8 +; CHECK-P8-NEXT: xxmrglw vs1, v6, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: vmrghh v0, v0, v8 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 -; CHECK-P8-NEXT: xxmrglw vs2, v1, v4 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: xxmrglw vs3, v6, v7 +; CHECK-P8-NEXT: xxmrglw vs2, v7, v5 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxmrglw vs3, v0, v1 ; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll @@ -90,10 +90,12 @@ ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 ; CHECK-P8-NEXT: xxmrghw vs3, v3, v3 ; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 @@ -173,33 +175,37 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xxmrghw vs3, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs1, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs2, v2, v2 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw vs5, v4, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 -; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 -; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v3 ; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 ; CHECK-P8-NEXT: xxmrglw vs6, v2, v2 ; CHECK-P8-NEXT: xxmrghw vs7, v2, v2 ; CHECK-P8-NEXT: xvcvspdp vs5, vs5 -; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs1, vs1 ; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs4, vs4 ; CHECK-P8-NEXT: xvcvspdp vs6, vs6 ; CHECK-P8-NEXT: xvcvspdp vs7, vs7 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs3 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs5 -; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v2, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v0, vs4 -; CHECK-P8-NEXT: xvcvdpuxds v1, vs2 +; CHECK-P8-NEXT: xvcvdpuxds v1, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v6, vs6 ; CHECK-P8-NEXT: xxswapd vs0, v3 ; CHECK-P8-NEXT: xvcvdpuxds v7, vs7 @@ -389,10 +395,12 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 ; CHECK-P8-NEXT: xxmrghw vs3, v3, v3 ; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 @@ -472,33 +480,37 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xxmrghw vs3, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs1, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs2, v2, v2 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw vs5, v4, v4 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 -; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 -; CHECK-P8-NEXT: lvx v2, 0, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 -; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v3 ; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 ; CHECK-P8-NEXT: xxmrglw vs6, v2, v2 ; CHECK-P8-NEXT: xxmrghw vs7, v2, v2 ; CHECK-P8-NEXT: xvcvspdp vs5, vs5 -; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs1, vs1 ; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs4, vs4 ; CHECK-P8-NEXT: xvcvspdp vs6, vs6 ; CHECK-P8-NEXT: xvcvspdp vs7, vs7 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs3 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs5 -; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v2, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v0, vs4 -; CHECK-P8-NEXT: xvcvdpuxds v1, vs2 +; CHECK-P8-NEXT: xvcvdpuxds v1, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v6, vs6 ; CHECK-P8-NEXT: xxswapd vs0, v3 ; CHECK-P8-NEXT: xvcvdpuxds v7, vs7 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -175,53 +175,53 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r4, f3 ; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f6 +; CHECK-P8-NEXT: xscvdpsxws f2, f7 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 ; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghb v2, v2, v4 +; CHECK-P8-NEXT: vmrghb v3, v3, v5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v4, v0 +; CHECK-P8-NEXT: vmrghb v5, v5, v1 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 +; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -343,102 +343,102 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v4, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: lvx v2, r3, r5 -; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 -; CHECK-P8-NEXT: xxswapd vs2, v4 -; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f1, v4 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: lxvd2x vs4, r3, r4 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xscvspdpn f5, vs2 +; CHECK-P8-NEXT: xxswapd v5, vs3 +; CHECK-P8-NEXT: xscvspdpn f6, vs3 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f8, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v5 +; CHECK-P8-NEXT: xxswapd v7, vs4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f8, f8 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r5, f2 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f5 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v0, r3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: xxswapd vs4, v0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f8 +; CHECK-P8-NEXT: vmrghb v2, v4, v0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f9 +; CHECK-P8-NEXT: mtvsrd v4, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f7, vs4 +; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 +; CHECK-P8-NEXT: mffprwz r3, f5 +; CHECK-P8-NEXT: xscvdpsxws f5, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, v7 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v4, v5, v4 -; CHECK-P8-NEXT: mtvsrd v5, r5 -; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mffprwz r3, f5 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrghb v5, v0, v8 +; CHECK-P8-NEXT: mtvsrd v0, r3 ; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f6 +; CHECK-P8-NEXT: vmrghb v3, v3, v6 ; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: vmrghb v5, v5, v7 -; CHECK-P8-NEXT: vmrghb v1, v1, v6 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r3, f7 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: vmrghb v0, v0, v6 +; CHECK-P8-NEXT: vmrghb v1, v1, v7 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mtvsrd v9, r3 -; CHECK-P8-NEXT: vmrghb v7, v8, v7 -; CHECK-P8-NEXT: vmrghb v6, v6, v9 -; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v6, v6, v8 +; CHECK-P8-NEXT: vmrghb v7, v7, v9 +; CHECK-P8-NEXT: vmrglh v2, v4, v2 +; CHECK-P8-NEXT: vmrglh v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v6, v7 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 ; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: blr @@ -815,53 +815,53 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: lxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxsldwi vs2, v2, v2, 3 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v2 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 ; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mffprwz r4, f3 ; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mffprwz r3, f2 -; CHECK-P8-NEXT: xscvdpsxws f2, f4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f4, f5 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v4, v2 -; CHECK-P8-NEXT: mffprwz r4, f2 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mtvsrd v3, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: mtvsrd v4, r4 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: xscvdpsxws f0, f6 +; CHECK-P8-NEXT: xscvdpsxws f2, f7 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mffprwz r4, f4 ; CHECK-P8-NEXT: mtvsrd v4, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: mtvsrd v0, r4 -; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v5, v0, v5 -; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r4, f5 +; CHECK-P8-NEXT: vmrghb v2, v2, v4 +; CHECK-P8-NEXT: vmrghb v3, v3, v5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v4, v0 +; CHECK-P8-NEXT: vmrghb v5, v5, v1 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrghb v4, v4, v1 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 +; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -983,102 +983,102 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v4, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: lvx v2, r3, r5 -; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 -; CHECK-P8-NEXT: xxswapd vs2, v4 -; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f1, v4 -; CHECK-P8-NEXT: xscvspdpn f3, v3 -; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 +; CHECK-P8-NEXT: lxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: li r4, 32 +; CHECK-P8-NEXT: lxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: lxvd2x vs4, r3, r4 +; CHECK-P8-NEXT: xxswapd v3, vs2 +; CHECK-P8-NEXT: xscvspdpn f5, vs2 +; CHECK-P8-NEXT: xxswapd v5, vs3 +; CHECK-P8-NEXT: xscvspdpn f6, vs3 +; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f8, v2 +; CHECK-P8-NEXT: xxsldwi vs2, v3, v3, 3 +; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f3, v5 +; CHECK-P8-NEXT: xxswapd v7, vs4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 -; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: xscvdpsxws f8, f8 +; CHECK-P8-NEXT: xscvspdpn f9, vs9 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mffprwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: mffprwz r5, f2 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mffprwz r3, f5 ; CHECK-P8-NEXT: mffprwz r4, f1 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: vmrghb v3, v4, v3 -; CHECK-P8-NEXT: mtvsrd v4, r5 -; CHECK-P8-NEXT: mffprwz r5, f3 -; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvspdpn f1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd v5, r4 -; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v0, r3, r4 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f5, v2 -; CHECK-P8-NEXT: mffprwz r4, f3 -; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 -; CHECK-P8-NEXT: mtvsrd v1, r3 -; CHECK-P8-NEXT: mffprwz r3, f4 -; CHECK-P8-NEXT: xxswapd vs4, v0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mffprwz r3, f8 +; CHECK-P8-NEXT: vmrghb v2, v4, v0 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvdpsxws f5, f9 +; CHECK-P8-NEXT: mtvsrd v4, r3 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v0 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f6, f9 -; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f7, vs4 +; CHECK-P8-NEXT: xxsldwi vs4, v7, v7, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mffprwz r4, f1 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 1 +; CHECK-P8-NEXT: mffprwz r3, f5 +; CHECK-P8-NEXT: xscvdpsxws f5, f6 +; CHECK-P8-NEXT: xxsldwi vs6, v7, v7, 1 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xscvspdpn f1, v7 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f6 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: vmrghb v2, v6, v1 -; CHECK-P8-NEXT: mtvsrd v1, r4 -; CHECK-P8-NEXT: mffprwz r4, f5 -; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: vmrghb v4, v5, v4 -; CHECK-P8-NEXT: mtvsrd v5, r5 -; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mffprwz r3, f5 +; CHECK-P8-NEXT: mffprwz r4, f3 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrghb v5, v0, v8 +; CHECK-P8-NEXT: mtvsrd v0, r3 ; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mffprwz r3, f0 ; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f6 +; CHECK-P8-NEXT: vmrghb v3, v3, v6 ; CHECK-P8-NEXT: mtvsrd v6, r3 -; CHECK-P8-NEXT: mffprwz r3, f3 -; CHECK-P8-NEXT: vmrghb v5, v5, v7 -; CHECK-P8-NEXT: vmrghb v1, v1, v6 -; CHECK-P8-NEXT: mtvsrd v6, r4 -; CHECK-P8-NEXT: mffprwz r4, f4 -; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mffprwz r3, f7 +; CHECK-P8-NEXT: mffprwz r4, f1 +; CHECK-P8-NEXT: vmrghb v0, v0, v6 +; CHECK-P8-NEXT: vmrghb v1, v1, v7 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mffprwz r3, f0 -; CHECK-P8-NEXT: mtvsrd v8, r4 -; CHECK-P8-NEXT: mtvsrd v9, r3 -; CHECK-P8-NEXT: vmrghb v7, v8, v7 -; CHECK-P8-NEXT: vmrghb v6, v6, v9 -; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: mffprwz r4, f2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v6, v6, v8 +; CHECK-P8-NEXT: vmrghb v7, v7, v9 +; CHECK-P8-NEXT: vmrglh v2, v4, v2 +; CHECK-P8-NEXT: vmrglh v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v4, v1, v0 -; CHECK-P8-NEXT: vmrglh v5, v6, v7 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v3 +; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: xxmrglw vs0, v3, v2 ; CHECK-P8-NEXT: xxmrglw vs1, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -371,10 +371,12 @@ ; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: xxmrglw vs2, v1, v0 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: xxswapd vs1, v2 ; CHECK-P8-NEXT: xxmrglw vs3, v7, v6 ; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -916,10 +918,12 @@ ; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: xxmrgld v2, vs1, vs0 ; CHECK-P8-NEXT: xxmrglw vs2, v1, v0 -; CHECK-P8-NEXT: stvx v2, 0, r3 +; CHECK-P8-NEXT: xxswapd vs1, v2 ; CHECK-P8-NEXT: xxmrglw vs3, v7, v6 ; CHECK-P8-NEXT: xxmrgld v3, vs3, vs2 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -95,22 +95,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3 -; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3 +; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1 +; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1 +; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2 +; CHECK-P8-NEXT: xxmrgld vs2, vs3, vs2 ; CHECK-P8-NEXT: xvcvdpuxws v2, vs4 ; CHECK-P8-NEXT: xvcvdpuxws v3, vs0 ; CHECK-P8-NEXT: xvcvdpuxws v4, vs1 ; CHECK-P8-NEXT: xvcvdpuxws v5, vs2 ; CHECK-P8-NEXT: vmrgew v2, v3, v2 ; CHECK-P8-NEXT: vmrgew v3, v5, v4 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, 0, r3 +; CHECK-P8-NEXT: stxvd2x v2, r3, r5 +; CHECK-P8-NEXT: stxvd2x v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -164,51 +160,43 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: li r7, 16 -; CHECK-P8-NEXT: li r9, 80 -; CHECK-P8-NEXT: lxvd2x vs7, 0, r4 +; CHECK-P8-NEXT: li r7, 64 +; CHECK-P8-NEXT: li r8, 80 +; CHECK-P8-NEXT: lxvd2x vs8, 0, r4 ; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r8 -; CHECK-P8-NEXT: li r8, 96 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r8 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r8 +; CHECK-P8-NEXT: li r7, 96 ; CHECK-P8-NEXT: li r8, 112 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r9 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r7 +; CHECK-P8-NEXT: li r7, 16 ; CHECK-P8-NEXT: lxvd2x vs6, r4, r8 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xxswapd vs4, vs4 -; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0 -; CHECK-P8-NEXT: xxswapd vs6, vs6 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs7 -; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3 -; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3 -; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5 -; CHECK-P8-NEXT: xvcvdpuxws v2, vs8 +; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1 +; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxmrgld vs2, vs2, vs3 +; CHECK-P8-NEXT: xxmrghd vs3, vs5, vs6 ; CHECK-P8-NEXT: xvcvdpuxws v3, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5 -; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P8-NEXT: xvcvdpuxws v4, vs7 -; CHECK-P8-NEXT: xvcvdpuxws v5, vs3 -; CHECK-P8-NEXT: xvcvdpuxws v0, vs4 +; CHECK-P8-NEXT: xxmrgld vs0, vs5, vs6 +; CHECK-P8-NEXT: xvcvdpuxws v4, vs1 +; CHECK-P8-NEXT: xxmrghd vs1, vs8, vs7 +; CHECK-P8-NEXT: xvcvdpuxws v5, vs2 +; CHECK-P8-NEXT: xxmrgld vs2, vs8, vs7 +; CHECK-P8-NEXT: xvcvdpuxws v2, vs4 +; CHECK-P8-NEXT: xvcvdpuxws v0, vs3 ; CHECK-P8-NEXT: xvcvdpuxws v1, vs0 -; CHECK-P8-NEXT: xvcvdpuxws v6, vs5 -; CHECK-P8-NEXT: xvcvdpuxws v7, vs1 +; CHECK-P8-NEXT: xvcvdpuxws v6, vs1 +; CHECK-P8-NEXT: xvcvdpuxws v7, vs2 ; CHECK-P8-NEXT: vmrgew v2, v3, v2 ; CHECK-P8-NEXT: vmrgew v3, v5, v4 ; CHECK-P8-NEXT: vmrgew v4, v1, v0 ; CHECK-P8-NEXT: vmrgew v5, v7, v6 -; CHECK-P8-NEXT: stvx v2, r3, r7 -; CHECK-P8-NEXT: stvx v3, r3, r5 -; CHECK-P8-NEXT: stvx v4, r3, r6 -; CHECK-P8-NEXT: stvx v5, 0, r3 +; CHECK-P8-NEXT: stxvd2x v4, r3, r6 +; CHECK-P8-NEXT: stxvd2x v3, r3, r5 +; CHECK-P8-NEXT: stxvd2x v2, r3, r7 +; CHECK-P8-NEXT: stxvd2x v5, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -375,22 +363,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r5 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xxmrgld vs4, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrgld vs1, vs2, vs3 -; CHECK-P8-NEXT: xxmrghd vs2, vs2, vs3 +; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1 +; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1 +; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs2 +; CHECK-P8-NEXT: xxmrgld vs2, vs3, vs2 ; CHECK-P8-NEXT: xvcvdpsxws v2, vs4 ; CHECK-P8-NEXT: xvcvdpsxws v3, vs0 ; CHECK-P8-NEXT: xvcvdpsxws v4, vs1 ; CHECK-P8-NEXT: xvcvdpsxws v5, vs2 ; CHECK-P8-NEXT: vmrgew v2, v3, v2 ; CHECK-P8-NEXT: vmrgew v3, v5, v4 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, 0, r3 +; CHECK-P8-NEXT: stxvd2x v2, r3, r5 +; CHECK-P8-NEXT: stxvd2x v3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -444,51 +428,43 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: li r7, 16 -; CHECK-P8-NEXT: li r9, 80 -; CHECK-P8-NEXT: lxvd2x vs7, 0, r4 +; CHECK-P8-NEXT: li r7, 64 +; CHECK-P8-NEXT: li r8, 80 +; CHECK-P8-NEXT: lxvd2x vs8, 0, r4 ; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r8 -; CHECK-P8-NEXT: li r8, 96 ; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 -; CHECK-P8-NEXT: lxvd2x vs5, r4, r8 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r8 +; CHECK-P8-NEXT: li r7, 96 ; CHECK-P8-NEXT: li r8, 112 -; CHECK-P8-NEXT: lxvd2x vs4, r4, r9 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: lxvd2x vs5, r4, r7 +; CHECK-P8-NEXT: li r7, 16 ; CHECK-P8-NEXT: lxvd2x vs6, r4, r8 -; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xxswapd vs4, vs4 -; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xxmrgld vs8, vs1, vs0 -; CHECK-P8-NEXT: xxswapd vs6, vs6 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxswapd vs1, vs7 -; CHECK-P8-NEXT: xxmrgld vs7, vs4, vs3 -; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs3 -; CHECK-P8-NEXT: xxmrgld vs4, vs6, vs5 -; CHECK-P8-NEXT: xvcvdpsxws v2, vs8 +; CHECK-P8-NEXT: xxmrghd vs4, vs0, vs1 +; CHECK-P8-NEXT: xxmrgld vs0, vs0, vs1 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xxmrgld vs2, vs2, vs3 +; CHECK-P8-NEXT: xxmrghd vs3, vs5, vs6 ; CHECK-P8-NEXT: xvcvdpsxws v3, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs6, vs5 -; CHECK-P8-NEXT: xxmrgld vs5, vs2, vs1 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P8-NEXT: xvcvdpsxws v4, vs7 -; CHECK-P8-NEXT: xvcvdpsxws v5, vs3 -; CHECK-P8-NEXT: xvcvdpsxws v0, vs4 +; CHECK-P8-NEXT: xxmrgld vs0, vs5, vs6 +; CHECK-P8-NEXT: xvcvdpsxws v4, vs1 +; CHECK-P8-NEXT: xxmrghd vs1, vs8, vs7 +; CHECK-P8-NEXT: xvcvdpsxws v5, vs2 +; CHECK-P8-NEXT: xxmrgld vs2, vs8, vs7 +; CHECK-P8-NEXT: xvcvdpsxws v2, vs4 +; CHECK-P8-NEXT: xvcvdpsxws v0, vs3 ; CHECK-P8-NEXT: xvcvdpsxws v1, vs0 -; CHECK-P8-NEXT: xvcvdpsxws v6, vs5 -; CHECK-P8-NEXT: xvcvdpsxws v7, vs1 +; CHECK-P8-NEXT: xvcvdpsxws v6, vs1 +; CHECK-P8-NEXT: xvcvdpsxws v7, vs2 ; CHECK-P8-NEXT: vmrgew v2, v3, v2 ; CHECK-P8-NEXT: vmrgew v3, v5, v4 ; CHECK-P8-NEXT: vmrgew v4, v1, v0 ; CHECK-P8-NEXT: vmrgew v5, v7, v6 -; CHECK-P8-NEXT: stvx v2, r3, r7 -; CHECK-P8-NEXT: stvx v3, r3, r5 -; CHECK-P8-NEXT: stvx v4, r3, r6 -; CHECK-P8-NEXT: stvx v5, 0, r3 +; CHECK-P8-NEXT: stxvd2x v4, r3, r6 +; CHECK-P8-NEXT: stxvd2x v3, r3, r5 +; CHECK-P8-NEXT: stxvd2x v2, r3, r7 +; CHECK-P8-NEXT: stxvd2x v5, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll @@ -64,12 +64,12 @@ ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: xvcvspuxws v3, v3 -; CHECK-P8-NEXT: xvcvspuxws v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: xvcvspuxws vs1, vs1 +; CHECK-P8-NEXT: xvcvspuxws vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -104,18 +104,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: xvcvspuxws v5, v5 -; CHECK-P8-NEXT: xvcvspuxws v2, v2 -; CHECK-P8-NEXT: xvcvspuxws v3, v3 -; CHECK-P8-NEXT: xvcvspuxws v4, v4 -; CHECK-P8-NEXT: stvx v5, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, r3, r6 -; CHECK-P8-NEXT: stvx v4, r3, r7 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: xvcvspuxws vs3, vs3 +; CHECK-P8-NEXT: xvcvspuxws vs0, vs0 +; CHECK-P8-NEXT: xvcvspuxws vs1, vs1 +; CHECK-P8-NEXT: xvcvspuxws vs2, vs2 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -211,12 +211,12 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: xvcvspsxws v3, v3 -; CHECK-P8-NEXT: xvcvspsxws v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: xvcvspsxws vs1, vs1 +; CHECK-P8-NEXT: xvcvspsxws vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -251,18 +251,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: xvcvspsxws v5, v5 -; CHECK-P8-NEXT: xvcvspsxws v2, v2 -; CHECK-P8-NEXT: xvcvspsxws v3, v3 -; CHECK-P8-NEXT: xvcvspsxws v4, v4 -; CHECK-P8-NEXT: stvx v5, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, r3, r6 -; CHECK-P8-NEXT: stvx v4, r3, r7 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: xvcvspsxws vs3, vs3 +; CHECK-P8-NEXT: xvcvspsxws vs0, vs0 +; CHECK-P8-NEXT: xvcvspsxws vs1, vs1 +; CHECK-P8-NEXT: xvcvspsxws vs2, vs2 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -101,10 +101,12 @@ ; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: vmrglh v4, v3, v2 ; CHECK-P8-NEXT: vmrghh v2, v3, v2 -; CHECK-P8-NEXT: xvcvuxwsp v3, v4 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r4 +; CHECK-P8-NEXT: xvcvuxwsp vs0, v4 +; CHECK-P8-NEXT: xvcvuxwsp vs1, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -138,29 +140,37 @@ ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-P8-NEXT: addis r6, r2, .LCPI3_1@toc@ha -; CHECK-P8-NEXT: xxlxor v3, v3, v3 -; CHECK-P8-NEXT: lvx v4, 0, r4 +; CHECK-P8-NEXT: li r6, 16 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI3_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v0, 0, r6 -; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v5, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha +; CHECK-P8-NEXT: addi r4, r5, .LCPI3_1@toc@l +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: vperm v1, v3, v4, v2 -; CHECK-P8-NEXT: vperm v2, v3, v5, v2 -; CHECK-P8-NEXT: vperm v5, v3, v5, v0 -; CHECK-P8-NEXT: vperm v3, v3, v4, v0 -; CHECK-P8-NEXT: xvcvuxwsp v4, v1 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: xvcvuxwsp v5, v5 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: stvx v4, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r6 -; CHECK-P8-NEXT: stvx v5, r3, r4 -; CHECK-P8-NEXT: stvx v3, r3, r5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: vperm v1, v4, v2, v3 +; CHECK-P8-NEXT: vperm v3, v4, v5, v3 +; CHECK-P8-NEXT: vperm v5, v4, v5, v0 +; CHECK-P8-NEXT: vperm v2, v4, v2, v0 +; CHECK-P8-NEXT: xvcvuxwsp vs0, v1 +; CHECK-P8-NEXT: xvcvuxwsp vs1, v3 +; CHECK-P8-NEXT: xvcvuxwsp vs2, v5 +; CHECK-P8-NEXT: xvcvuxwsp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -317,10 +327,12 @@ ; CHECK-P8-NEXT: vslw v2, v2, v3 ; CHECK-P8-NEXT: vsraw v4, v4, v3 ; CHECK-P8-NEXT: vsraw v2, v2, v3 -; CHECK-P8-NEXT: xvcvsxwsp v3, v4 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r4 +; CHECK-P8-NEXT: xvcvsxwsp vs0, v4 +; CHECK-P8-NEXT: xvcvsxwsp vs1, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -356,16 +368,18 @@ ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: vspltisw v5, 8 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v3, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: vadduwm v5, v5, v5 +; CHECK-P8-NEXT: xxswapd v3, vs1 ; CHECK-P8-NEXT: vmrglh v4, v2, v2 ; CHECK-P8-NEXT: vmrglh v0, v3, v3 ; CHECK-P8-NEXT: vmrghh v3, v3, v3 ; CHECK-P8-NEXT: vmrghh v2, v2, v2 -; CHECK-P8-NEXT: vadduwm v5, v5, v5 ; CHECK-P8-NEXT: vslw v4, v4, v5 ; CHECK-P8-NEXT: vslw v0, v0, v5 ; CHECK-P8-NEXT: vslw v3, v3, v5 @@ -374,14 +388,18 @@ ; CHECK-P8-NEXT: vsraw v0, v0, v5 ; CHECK-P8-NEXT: vsraw v3, v3, v5 ; CHECK-P8-NEXT: vsraw v2, v2, v5 -; CHECK-P8-NEXT: xvcvsxwsp v4, v4 -; CHECK-P8-NEXT: xvcvsxwsp v5, v0 -; CHECK-P8-NEXT: xvcvsxwsp v3, v3 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: stvx v4, 0, r3 -; CHECK-P8-NEXT: stvx v5, r3, r6 -; CHECK-P8-NEXT: stvx v3, r3, r4 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: xvcvsxwsp vs0, v4 +; CHECK-P8-NEXT: xvcvsxwsp vs1, v0 +; CHECK-P8-NEXT: xvcvsxwsp vs2, v3 +; CHECK-P8-NEXT: xvcvsxwsp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs3, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -16,7 +16,8 @@ ; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxddp v2, v2 ; CHECK-P8-NEXT: blr @@ -53,15 +54,17 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha -; CHECK-P8-NEXT: mtvsrd v2, r4 +; CHECK-P8-NEXT: xxlxor v2, v2, v2 ; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l -; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: mtvsrd v4, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: vperm v2, v4, v2, v5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: vperm v3, v2, v4, v3 +; CHECK-P8-NEXT: vperm v2, v2, v4, v5 ; CHECK-P8-NEXT: xvcvuxddp vs0, v3 ; CHECK-P8-NEXT: xvcvuxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 @@ -120,16 +123,20 @@ ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_2@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 -; CHECK-P8-NEXT: lvx v1, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: xxswapd v0, vs2 +; CHECK-P8-NEXT: xxswapd v1, vs3 ; CHECK-P8-NEXT: vperm v3, v4, v2, v3 ; CHECK-P8-NEXT: vperm v5, v4, v2, v5 ; CHECK-P8-NEXT: vperm v0, v4, v2, v0 @@ -215,38 +222,44 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r4 -; CHECK-P8-NEXT: xxlxor v3, v3, v3 -; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l +; CHECK-P8-NEXT: addis r6, r2, .LCPI3_2@toc@ha +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r6 -; CHECK-P8-NEXT: li r6, 16 -; CHECK-P8-NEXT: lvx v2, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha -; CHECK-P8-NEXT: lvx v0, r4, r6 -; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha -; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l -; CHECK-P8-NEXT: lvx v1, 0, r5 -; CHECK-P8-NEXT: li r5, 96 -; CHECK-P8-NEXT: lvx v8, 0, r4 -; CHECK-P8-NEXT: vperm v6, v3, v4, v2 +; CHECK-P8-NEXT: addi r6, r6, .LCPI3_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r6 +; CHECK-P8-NEXT: addis r6, r2, .LCPI3_1@toc@ha +; CHECK-P8-NEXT: lxvd2x vs3, r4, r5 +; CHECK-P8-NEXT: addi r4, r6, .LCPI3_1@toc@l +; CHECK-P8-NEXT: addis r6, r2, .LCPI3_3@toc@ha +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: lxvd2x vs4, 0, r4 +; CHECK-P8-NEXT: addi r4, r6, .LCPI3_3@toc@l +; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: vperm v7, v3, v4, v5 -; CHECK-P8-NEXT: vperm v2, v3, v0, v2 -; CHECK-P8-NEXT: vperm v9, v3, v0, v1 -; CHECK-P8-NEXT: vperm v5, v3, v0, v5 -; CHECK-P8-NEXT: vperm v0, v3, v0, v8 -; CHECK-P8-NEXT: vperm v1, v3, v4, v1 -; CHECK-P8-NEXT: vperm v3, v3, v4, v8 -; CHECK-P8-NEXT: xvcvuxddp vs1, v2 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: xxswapd v1, vs4 +; CHECK-P8-NEXT: xxswapd v8, vs0 +; CHECK-P8-NEXT: vperm v6, v4, v2, v3 +; CHECK-P8-NEXT: vperm v7, v4, v2, v5 +; CHECK-P8-NEXT: vperm v3, v4, v0, v3 +; CHECK-P8-NEXT: vperm v9, v4, v0, v1 +; CHECK-P8-NEXT: vperm v5, v4, v0, v5 +; CHECK-P8-NEXT: vperm v0, v4, v0, v8 +; CHECK-P8-NEXT: vperm v1, v4, v2, v1 +; CHECK-P8-NEXT: vperm v2, v4, v2, v8 +; CHECK-P8-NEXT: xvcvuxddp vs1, v3 ; CHECK-P8-NEXT: xvcvuxddp vs4, v9 ; CHECK-P8-NEXT: xvcvuxddp vs2, v5 ; CHECK-P8-NEXT: xvcvuxddp vs3, v0 ; CHECK-P8-NEXT: xvcvuxddp vs0, v7 -; CHECK-P8-NEXT: xvcvuxddp vs5, v3 +; CHECK-P8-NEXT: xvcvuxddp vs5, v2 ; CHECK-P8-NEXT: xvcvuxddp vs6, v6 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xvcvuxddp vs7, v1 @@ -256,18 +269,18 @@ ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs5, vs5 ; CHECK-P8-NEXT: stxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r6 ; CHECK-P8-NEXT: li r4, 80 -; CHECK-P8-NEXT: li r5, 64 +; CHECK-P8-NEXT: li r6, 64 ; CHECK-P8-NEXT: xxswapd vs2, vs7 ; CHECK-P8-NEXT: xxswapd vs3, vs6 ; CHECK-P8-NEXT: stxvd2x vs4, r3, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 -; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r4 -; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 -; CHECK-P8-NEXT: stxvd2x vs2, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; @@ -371,7 +384,8 @@ ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: xxswapd v3, vs0 @@ -416,11 +430,13 @@ ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: xxswapd v4, vs1 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 ; CHECK-P8-NEXT: xxswapd v4, vs0 @@ -483,23 +499,27 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r5, r2, .LCPI6_2@toc@ha ; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-P8-NEXT: addis r6, r2, .LCPI6_3@toc@ha -; CHECK-P8-NEXT: addi r5, r5, .LCPI6_2@toc@l +; CHECK-P8-NEXT: addis r5, r2, .LCPI6_2@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI6_3@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: addi r5, r5, .LCPI6_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI6_3@toc@ha +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_4@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v5, 0, r6 -; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI6_3@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI6_4@toc@l +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha +; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v4, vs1 ; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r5 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: xxswapd v0, vs3 ; CHECK-P8-NEXT: vperm v3, v2, v2, v3 ; CHECK-P8-NEXT: vperm v4, v2, v2, v4 ; CHECK-P8-NEXT: vperm v5, v2, v2, v5 @@ -602,63 +622,69 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI7_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI7_2@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: addi r5, r5, .LCPI7_0@toc@l ; CHECK-P8-NEXT: addi r6, r6, .LCPI7_2@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI7_3@toc@ha -; CHECK-P8-NEXT: lvx v3, 0, r6 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r6 ; CHECK-P8-NEXT: addis r6, r2, .LCPI7_4@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI7_3@toc@l ; CHECK-P8-NEXT: addi r6, r6, .LCPI7_4@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r5 -; CHECK-P8-NEXT: lvx v0, 0, r6 -; CHECK-P8-NEXT: li r6, 16 -; CHECK-P8-NEXT: addis r5, r2, .LCPI7_1@toc@ha -; CHECK-P8-NEXT: lvx v7, r4, r6 -; CHECK-P8-NEXT: addi r5, r5, .LCPI7_1@toc@l -; CHECK-P8-NEXT: vperm v1, v4, v4, v2 +; CHECK-P8-NEXT: xxswapd v2, vs1 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs4, 0, r6 +; CHECK-P8-NEXT: li r6, 96 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: xxswapd v4, vs2 +; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1@toc@l +; CHECK-P8-NEXT: xxswapd v5, vs3 +; CHECK-P8-NEXT: xxswapd v0, vs4 +; CHECK-P8-NEXT: xxswapd v6, vs0 +; CHECK-P8-NEXT: vperm v1, v2, v2, v3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: vperm v6, v4, v4, v3 -; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 -; CHECK-P8-NEXT: li r5, 96 -; CHECK-P8-NEXT: vperm v8, v4, v4, v5 -; CHECK-P8-NEXT: vperm v4, v4, v4, v0 -; CHECK-P8-NEXT: vperm v5, v7, v7, v5 +; CHECK-P8-NEXT: vperm v7, v2, v2, v4 +; CHECK-P8-NEXT: vperm v8, v2, v2, v5 +; CHECK-P8-NEXT: vperm v2, v2, v2, v0 ; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: vperm v0, v7, v7, v0 -; CHECK-P8-NEXT: vperm v2, v7, v7, v2 -; CHECK-P8-NEXT: vperm v3, v7, v7, v3 +; CHECK-P8-NEXT: vperm v5, v6, v6, v5 +; CHECK-P8-NEXT: vperm v0, v6, v6, v0 +; CHECK-P8-NEXT: vperm v3, v6, v6, v3 +; CHECK-P8-NEXT: vperm v4, v6, v6, v4 ; CHECK-P8-NEXT: vsld v1, v1, v9 -; CHECK-P8-NEXT: vsld v6, v6, v9 +; CHECK-P8-NEXT: vsld v6, v7, v9 ; CHECK-P8-NEXT: vsld v5, v5, v9 ; CHECK-P8-NEXT: vsld v0, v0, v9 -; CHECK-P8-NEXT: vsld v2, v2, v9 ; CHECK-P8-NEXT: vsld v3, v3, v9 +; CHECK-P8-NEXT: vsld v4, v4, v9 ; CHECK-P8-NEXT: vsrad v5, v5, v9 ; CHECK-P8-NEXT: vsrad v0, v0, v9 ; CHECK-P8-NEXT: vsld v7, v8, v9 -; CHECK-P8-NEXT: vsld v4, v4, v9 -; CHECK-P8-NEXT: vsrad v2, v2, v9 +; CHECK-P8-NEXT: vsld v2, v2, v9 ; CHECK-P8-NEXT: vsrad v3, v3, v9 +; CHECK-P8-NEXT: vsrad v4, v4, v9 ; CHECK-P8-NEXT: xvcvsxddp vs2, v5 ; CHECK-P8-NEXT: xvcvsxddp vs3, v0 ; CHECK-P8-NEXT: vsrad v1, v1, v9 ; CHECK-P8-NEXT: vsrad v6, v6, v9 ; CHECK-P8-NEXT: vsrad v7, v7, v9 -; CHECK-P8-NEXT: vsrad v4, v4, v9 -; CHECK-P8-NEXT: xvcvsxddp vs1, v2 +; CHECK-P8-NEXT: vsrad v2, v2, v9 +; CHECK-P8-NEXT: xvcvsxddp vs1, v3 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xvcvsxddp vs4, v3 +; CHECK-P8-NEXT: xvcvsxddp vs4, v4 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xvcvsxddp vs0, v7 -; CHECK-P8-NEXT: xvcvsxddp vs5, v4 +; CHECK-P8-NEXT: xvcvsxddp vs5, v2 ; CHECK-P8-NEXT: xvcvsxddp vs6, v1 ; CHECK-P8-NEXT: stxvd2x vs3, r3, r4 ; CHECK-P8-NEXT: li r4, 80 ; CHECK-P8-NEXT: xvcvsxddp vs7, v6 -; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 -; CHECK-P8-NEXT: li r5, 64 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r6 +; CHECK-P8-NEXT: li r6, 64 ; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xxswapd vs4, vs4 ; CHECK-P8-NEXT: xxswapd vs0, vs0 @@ -667,11 +693,11 @@ ; CHECK-P8-NEXT: stxvd2x vs4, r3, r4 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: xxswapd vs2, vs7 -; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 -; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r4 -; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 -; CHECK-P8-NEXT: stxvd2x vs2, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll @@ -81,10 +81,12 @@ ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw v5, v3, v3 ; CHECK-P8-NEXT: xxmrghw v3, v3, v3 ; CHECK-P8-NEXT: xxmrglw v4, v2, v2 @@ -152,25 +154,29 @@ ; CHECK-P8-NEXT: li r6, 48 ; CHECK-P8-NEXT: li r7, 32 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v0, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: li r4, 112 ; CHECK-P8-NEXT: xxmrglw v4, v2, v2 -; CHECK-P8-NEXT: xxmrghw v5, v3, v3 +; CHECK-P8-NEXT: xxmrghw v0, v3, v3 +; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: xxmrghw v2, v2, v2 ; CHECK-P8-NEXT: xxmrglw v3, v3, v3 ; CHECK-P8-NEXT: xvcvuxwdp vs0, v4 -; CHECK-P8-NEXT: lvx v4, 0, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xvcvuxwdp vs1, v5 -; CHECK-P8-NEXT: xxmrghw v5, v0, v0 -; CHECK-P8-NEXT: xxmrglw v0, v0, v0 +; CHECK-P8-NEXT: xxswapd v4, vs1 +; CHECK-P8-NEXT: xvcvuxwdp vs1, v0 +; CHECK-P8-NEXT: xxmrghw v0, v5, v5 +; CHECK-P8-NEXT: xxmrglw v5, v5, v5 ; CHECK-P8-NEXT: xvcvuxwdp vs2, v2 ; CHECK-P8-NEXT: xxmrglw v2, v4, v4 ; CHECK-P8-NEXT: xvcvuxwdp vs3, v3 ; CHECK-P8-NEXT: xxmrghw v3, v4, v4 -; CHECK-P8-NEXT: xvcvuxwdp vs4, v5 -; CHECK-P8-NEXT: xvcvuxwdp vs5, v0 +; CHECK-P8-NEXT: xvcvuxwdp vs4, v0 +; CHECK-P8-NEXT: xvcvuxwdp vs5, v5 ; CHECK-P8-NEXT: xvcvuxwdp vs6, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xvcvuxwdp vs7, v3 @@ -335,10 +341,12 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: xxmrglw v5, v3, v3 ; CHECK-P8-NEXT: xxmrghw v3, v3, v3 ; CHECK-P8-NEXT: xxmrglw v4, v2, v2 @@ -406,25 +414,29 @@ ; CHECK-P8-NEXT: li r6, 48 ; CHECK-P8-NEXT: li r7, 32 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v0, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v3, vs1 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: li r4, 112 ; CHECK-P8-NEXT: xxmrglw v4, v2, v2 -; CHECK-P8-NEXT: xxmrghw v5, v3, v3 +; CHECK-P8-NEXT: xxmrghw v0, v3, v3 +; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: xxmrghw v2, v2, v2 ; CHECK-P8-NEXT: xxmrglw v3, v3, v3 ; CHECK-P8-NEXT: xvcvsxwdp vs0, v4 -; CHECK-P8-NEXT: lvx v4, 0, r4 -; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xvcvsxwdp vs1, v5 -; CHECK-P8-NEXT: xxmrghw v5, v0, v0 -; CHECK-P8-NEXT: xxmrglw v0, v0, v0 +; CHECK-P8-NEXT: xxswapd v4, vs1 +; CHECK-P8-NEXT: xvcvsxwdp vs1, v0 +; CHECK-P8-NEXT: xxmrghw v0, v5, v5 +; CHECK-P8-NEXT: xxmrglw v5, v5, v5 ; CHECK-P8-NEXT: xvcvsxwdp vs2, v2 ; CHECK-P8-NEXT: xxmrglw v2, v4, v4 ; CHECK-P8-NEXT: xvcvsxwdp vs3, v3 ; CHECK-P8-NEXT: xxmrghw v3, v4, v4 -; CHECK-P8-NEXT: xvcvsxwdp vs4, v5 -; CHECK-P8-NEXT: xvcvsxwdp vs5, v0 +; CHECK-P8-NEXT: xvcvsxwdp vs4, v0 +; CHECK-P8-NEXT: xvcvsxwdp vs5, v5 ; CHECK-P8-NEXT: xvcvsxwdp vs6, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xvcvsxwdp vs7, v3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll @@ -115,8 +115,10 @@ ; CHECK-P8-NEXT: xxsldwi v4, vs2, vs2, 3 ; CHECK-P8-NEXT: vpkudum v2, v3, v2 ; CHECK-P8-NEXT: vpkudum v3, v4, v5 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, 0, r3 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -211,10 +213,14 @@ ; CHECK-P8-NEXT: xxsldwi v7, vs7, vs7, 3 ; CHECK-P8-NEXT: vpkudum v4, v1, v0 ; CHECK-P8-NEXT: vpkudum v5, v6, v7 -; CHECK-P8-NEXT: stvx v2, r3, r7 -; CHECK-P8-NEXT: stvx v3, r3, r5 -; CHECK-P8-NEXT: stvx v4, r3, r6 -; CHECK-P8-NEXT: stvx v5, 0, r3 +; CHECK-P8-NEXT: xxswapd vs2, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: xxswapd vs0, v4 +; CHECK-P8-NEXT: xxswapd vs3, v5 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -401,8 +407,10 @@ ; CHECK-P8-NEXT: xxsldwi v4, vs2, vs2, 3 ; CHECK-P8-NEXT: vpkudum v2, v3, v2 ; CHECK-P8-NEXT: vpkudum v3, v4, v5 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, 0, r3 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -497,10 +505,14 @@ ; CHECK-P8-NEXT: xxsldwi v7, vs7, vs7, 3 ; CHECK-P8-NEXT: vpkudum v4, v1, v0 ; CHECK-P8-NEXT: vpkudum v5, v6, v7 -; CHECK-P8-NEXT: stvx v2, r3, r7 -; CHECK-P8-NEXT: stvx v3, r3, r5 -; CHECK-P8-NEXT: stvx v4, r3, r6 -; CHECK-P8-NEXT: stvx v5, 0, r3 +; CHECK-P8-NEXT: xxswapd vs2, v2 +; CHECK-P8-NEXT: xxswapd vs1, v3 +; CHECK-P8-NEXT: xxswapd vs0, v4 +; CHECK-P8-NEXT: xxswapd vs3, v5 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -68,7 +68,8 @@ ; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxwsp v2, v2 ; CHECK-P8-NEXT: blr @@ -105,19 +106,23 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha -; CHECK-P8-NEXT: mtvsrd v2, r4 +; CHECK-P8-NEXT: xxlxor v2, v2, v2 ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-P8-NEXT: addi r4, r6, .LCPI2_1@toc@l -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: mtvsrd v4, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: vperm v2, v4, v2, v5 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI2_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: vperm v3, v2, v4, v3 +; CHECK-P8-NEXT: vperm v2, v2, v4, v5 +; CHECK-P8-NEXT: xvcvuxwsp vs0, v3 +; CHECK-P8-NEXT: xvcvuxwsp vs1, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -170,29 +175,37 @@ ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_2@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI3_3@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_3@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 -; CHECK-P8-NEXT: lvx v1, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: vperm v5, v4, v2, v5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: xxswapd v0, vs2 +; CHECK-P8-NEXT: xxswapd v1, vs3 ; CHECK-P8-NEXT: vperm v3, v4, v2, v3 +; CHECK-P8-NEXT: vperm v5, v4, v2, v5 ; CHECK-P8-NEXT: vperm v0, v4, v2, v0 ; CHECK-P8-NEXT: vperm v2, v4, v2, v1 -; CHECK-P8-NEXT: xvcvuxwsp v4, v5 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: xvcvuxwsp v5, v0 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: stvx v4, r3, r5 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v5, r3, r4 +; CHECK-P8-NEXT: xvcvuxwsp vs0, v3 +; CHECK-P8-NEXT: xvcvuxwsp vs1, v5 +; CHECK-P8-NEXT: xvcvuxwsp vs2, v0 +; CHECK-P8-NEXT: xvcvuxwsp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: stvx v2, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -320,7 +333,8 @@ ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha ; CHECK-P8-NEXT: mtvsrwz v3, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vspltisw v3, 12 ; CHECK-P8-NEXT: vadduwm v3, v3, v3 @@ -362,23 +376,27 @@ ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI6_1@toc@ha ; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: vspltisw v5, 12 ; CHECK-P8-NEXT: li r4, 16 ; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: addi r5, r6, .LCPI6_1@toc@l -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v4, vs1 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 -; CHECK-P8-NEXT: vadduwm v4, v5, v5 +; CHECK-P8-NEXT: vspltisw v4, 12 +; CHECK-P8-NEXT: vadduwm v4, v4, v4 ; CHECK-P8-NEXT: vslw v2, v2, v4 ; CHECK-P8-NEXT: vslw v3, v3, v4 ; CHECK-P8-NEXT: vsraw v2, v2, v4 ; CHECK-P8-NEXT: vsraw v3, v3, v4 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: xvcvsxwsp v3, v3 -; CHECK-P8-NEXT: stvx v2, 0, r3 -; CHECK-P8-NEXT: stvx v3, r3, r4 +; CHECK-P8-NEXT: xvcvsxwsp vs0, v2 +; CHECK-P8-NEXT: xvcvsxwsp vs1, v3 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -433,16 +451,20 @@ ; CHECK-P8-NEXT: vspltisw v1, 12 ; CHECK-P8-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI7_2@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI7_3@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI7_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI7_3@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI7_1@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: lvx v0, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 ; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v4, vs1 +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: xxswapd v0, vs3 ; CHECK-P8-NEXT: vperm v3, v2, v2, v3 ; CHECK-P8-NEXT: vperm v4, v2, v2, v4 ; CHECK-P8-NEXT: vperm v5, v2, v2, v5 @@ -456,15 +478,19 @@ ; CHECK-P8-NEXT: vsraw v4, v4, v0 ; CHECK-P8-NEXT: vsraw v5, v5, v0 ; CHECK-P8-NEXT: vsraw v2, v2, v0 -; CHECK-P8-NEXT: xvcvsxwsp v3, v3 -; CHECK-P8-NEXT: xvcvsxwsp v4, v4 -; CHECK-P8-NEXT: xvcvsxwsp v5, v5 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v4, r3, r5 -; CHECK-P8-NEXT: stvx v5, r3, r4 +; CHECK-P8-NEXT: xvcvsxwsp vs0, v3 +; CHECK-P8-NEXT: xvcvsxwsp vs1, v4 +; CHECK-P8-NEXT: xvcvsxwsp vs2, v5 +; CHECK-P8-NEXT: xvcvsxwsp vs3, v2 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xxswapd vs3, vs3 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: stvx v2, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, r3, r4 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -16,7 +16,8 @@ ; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-P8-NEXT: xvcvuxddp v2, v2 ; CHECK-P8-NEXT: blr @@ -53,15 +54,17 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI1_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI1_1@toc@ha -; CHECK-P8-NEXT: mtvsrwz v2, r4 +; CHECK-P8-NEXT: xxlxor v2, v2, v2 ; CHECK-P8-NEXT: addi r5, r5, .LCPI1_0@toc@l -; CHECK-P8-NEXT: addi r4, r6, .LCPI1_1@toc@l -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: lvx v5, 0, r4 +; CHECK-P8-NEXT: mtvsrwz v4, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: vperm v2, v4, v2, v5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI1_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: vperm v3, v2, v4, v3 +; CHECK-P8-NEXT: vperm v2, v2, v4, v5 ; CHECK-P8-NEXT: xvcvuxddp vs0, v3 ; CHECK-P8-NEXT: xvcvuxddp vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 @@ -117,31 +120,35 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI2_2@toc@ha -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_3@toc@ha +; CHECK-P8-NEXT: xxlxor v2, v2, v2 ; CHECK-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI2_3@toc@l -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: addi r5, r6, .LCPI2_2@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 +; CHECK-P8-NEXT: addi r6, r6, .LCPI2_2@toc@l +; CHECK-P8-NEXT: mtvsrd v4, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v5, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI2_1@toc@ha -; CHECK-P8-NEXT: addi r5, r5, .LCPI2_1@toc@l -; CHECK-P8-NEXT: lvx v1, 0, r5 -; CHECK-P8-NEXT: vperm v0, v4, v2, v0 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI2_3@toc@ha +; CHECK-P8-NEXT: lxvd2x vs1, 0, r6 +; CHECK-P8-NEXT: addis r6, r2, .LCPI2_1@toc@ha +; CHECK-P8-NEXT: addi r5, r5, .LCPI2_3@toc@l +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 +; CHECK-P8-NEXT: addi r5, r6, .LCPI2_1@toc@l +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v5, vs1 ; CHECK-P8-NEXT: li r5, 32 -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: vperm v5, v4, v2, v5 -; CHECK-P8-NEXT: vperm v2, v4, v2, v1 -; CHECK-P8-NEXT: xvcvuxddp vs2, v0 +; CHECK-P8-NEXT: xxswapd v0, vs2 +; CHECK-P8-NEXT: xxswapd v1, vs3 +; CHECK-P8-NEXT: vperm v3, v2, v4, v3 +; CHECK-P8-NEXT: vperm v5, v2, v4, v5 +; CHECK-P8-NEXT: vperm v0, v2, v4, v0 +; CHECK-P8-NEXT: vperm v2, v2, v4, v1 ; CHECK-P8-NEXT: xvcvuxddp vs0, v3 ; CHECK-P8-NEXT: xvcvuxddp vs1, v5 +; CHECK-P8-NEXT: xvcvuxddp vs2, v0 ; CHECK-P8-NEXT: xvcvuxddp vs3, v2 -; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs0, vs0 ; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xxswapd vs2, vs2 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: li r4, 16 @@ -224,58 +231,66 @@ ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_1@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI3_2@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_4@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_2@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_4@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI3_6@toc@ha -; CHECK-P8-NEXT: lvx v1, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_7@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_6@toc@l +; CHECK-P8-NEXT: xxswapd v3, vs0 ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_7@toc@l -; CHECK-P8-NEXT: vperm v3, v4, v2, v3 -; CHECK-P8-NEXT: lvx v6, 0, r4 +; CHECK-P8-NEXT: xxswapd v5, vs1 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: addis r4, r2, .LCPI3_5@toc@ha -; CHECK-P8-NEXT: lvx v7, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI3_3@toc@ha -; CHECK-P8-NEXT: vperm v5, v4, v2, v5 ; CHECK-P8-NEXT: addi r4, r4, .LCPI3_5@toc@l +; CHECK-P8-NEXT: xxswapd v0, vs2 ; CHECK-P8-NEXT: addi r5, r5, .LCPI3_3@toc@l -; CHECK-P8-NEXT: vperm v0, v4, v2, v0 -; CHECK-P8-NEXT: lvx v8, 0, r4 -; CHECK-P8-NEXT: lvx v9, 0, r5 -; CHECK-P8-NEXT: vperm v1, v4, v2, v1 +; CHECK-P8-NEXT: xxswapd v1, vs3 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: vperm v3, v4, v2, v3 ; CHECK-P8-NEXT: li r4, 112 ; CHECK-P8-NEXT: li r5, 96 +; CHECK-P8-NEXT: xxswapd v6, vs0 +; CHECK-P8-NEXT: xxswapd v7, vs1 +; CHECK-P8-NEXT: vperm v5, v4, v2, v5 +; CHECK-P8-NEXT: xxswapd v8, vs2 +; CHECK-P8-NEXT: xxswapd v9, vs3 ; CHECK-P8-NEXT: vperm v6, v4, v2, v6 ; CHECK-P8-NEXT: vperm v7, v4, v2, v7 ; CHECK-P8-NEXT: vperm v8, v4, v2, v8 +; CHECK-P8-NEXT: vperm v0, v4, v2, v0 +; CHECK-P8-NEXT: vperm v1, v4, v2, v1 ; CHECK-P8-NEXT: vperm v2, v4, v2, v9 -; CHECK-P8-NEXT: xvcvuxddp vs0, v0 -; CHECK-P8-NEXT: xvcvuxddp vs1, v1 ; CHECK-P8-NEXT: xvcvuxddp vs2, v6 ; CHECK-P8-NEXT: xvcvuxddp vs3, v7 ; CHECK-P8-NEXT: xvcvuxddp vs4, v8 +; CHECK-P8-NEXT: xvcvuxddp vs0, v0 +; CHECK-P8-NEXT: xvcvuxddp vs1, v1 ; CHECK-P8-NEXT: xvcvuxddp vs5, v2 ; CHECK-P8-NEXT: xvcvuxddp vs6, v3 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xvcvuxddp vs7, v5 -; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: xxswapd vs2, vs2 +; CHECK-P8-NEXT: xvcvuxddp vs7, v5 ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xxswapd vs4, vs4 -; CHECK-P8-NEXT: xxswapd vs5, vs5 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xxswapd vs1, vs1 ; CHECK-P8-NEXT: stxvd2x vs3, r3, r4 -; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: li r4, 80 +; CHECK-P8-NEXT: xxswapd vs5, vs5 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r5 ; CHECK-P8-NEXT: li r5, 64 -; CHECK-P8-NEXT: xxswapd vs2, vs7 ; CHECK-P8-NEXT: xxswapd vs3, vs6 ; CHECK-P8-NEXT: stxvd2x vs4, r3, r4 ; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd vs2, vs7 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r5 ; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: stxvd2x vs5, r3, r4 @@ -404,7 +419,8 @@ ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: xxswapd v3, vs0 @@ -449,11 +465,13 @@ ; CHECK-P8-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l ; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2@toc@l +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v4, 0, r5 +; CHECK-P8-NEXT: xxswapd v4, vs1 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v3, v3, v3, v4 ; CHECK-P8-NEXT: xxswapd v4, vs0 @@ -519,22 +537,26 @@ ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha ; CHECK-P8-NEXT: addis r6, r2, .LCPI6_2@toc@ha ; CHECK-P8-NEXT: mtvsrd v3, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l ; CHECK-P8-NEXT: addi r6, r6, .LCPI6_2@toc@l -; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-P8-NEXT: lvx v2, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 ; CHECK-P8-NEXT: addis r5, r2, .LCPI6_3@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r6 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r6 ; CHECK-P8-NEXT: addis r6, r2, .LCPI6_4@toc@ha -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 -; CHECK-P8-NEXT: li r4, 48 ; CHECK-P8-NEXT: addi r5, r5, .LCPI6_3@toc@l -; CHECK-P8-NEXT: lvx v5, 0, r5 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 ; CHECK-P8-NEXT: addi r5, r6, .LCPI6_4@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r5 -; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: xxswapd v4, vs1 +; CHECK-P8-NEXT: addis r5, r2, .LCPI6_1@toc@ha +; CHECK-P8-NEXT: addi r4, r5, .LCPI6_1@toc@l ; CHECK-P8-NEXT: li r5, 32 +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r4, 48 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: vperm v4, v3, v3, v4 ; CHECK-P8-NEXT: vperm v5, v3, v3, v5 ; CHECK-P8-NEXT: vperm v3, v3, v3, v0 @@ -639,38 +661,46 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI7_0@toc@ha ; CHECK-P8-NEXT: addis r5, r2, .LCPI7_2@toc@ha -; CHECK-P8-NEXT: addis r6, r2, .LCPI7_3@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-P8-NEXT: addi r5, r5, .LCPI7_2@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI7_3@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI7_4@toc@ha -; CHECK-P8-NEXT: lvx v4, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI7_5@toc@ha -; CHECK-P8-NEXT: lvx v5, 0, r6 -; CHECK-P8-NEXT: addis r6, r2, .LCPI7_1@toc@ha -; CHECK-P8-NEXT: addi r4, r4, .LCPI7_4@toc@l -; CHECK-P8-NEXT: addi r5, r5, .LCPI7_5@toc@l -; CHECK-P8-NEXT: addi r6, r6, .LCPI7_1@toc@l -; CHECK-P8-NEXT: lvx v0, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI7_6@toc@ha -; CHECK-P8-NEXT: lvx v1, 0, r5 -; CHECK-P8-NEXT: addis r5, r2, .LCPI7_7@toc@ha -; CHECK-P8-NEXT: lxvd2x vs0, 0, r6 -; CHECK-P8-NEXT: addi r4, r4, .LCPI7_6@toc@l -; CHECK-P8-NEXT: addi r5, r5, .LCPI7_7@toc@l +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI7_3@toc@ha +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI7_4@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI7_3@toc@l +; CHECK-P8-NEXT: addi r5, r5, .LCPI7_4@toc@l +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI7_5@toc@ha +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI7_6@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI7_5@toc@l +; CHECK-P8-NEXT: addi r5, r5, .LCPI7_6@toc@l +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: xxswapd v4, vs1 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: addis r4, r2, .LCPI7_7@toc@ha +; CHECK-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-P8-NEXT: addis r5, r2, .LCPI7_8@toc@ha +; CHECK-P8-NEXT: addi r4, r4, .LCPI7_7@toc@l +; CHECK-P8-NEXT: addi r5, r5, .LCPI7_8@toc@l +; CHECK-P8-NEXT: xxswapd v5, vs2 +; CHECK-P8-NEXT: xxswapd v0, vs3 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r5 +; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1@toc@ha ; CHECK-P8-NEXT: vperm v3, v2, v2, v3 -; CHECK-P8-NEXT: lvx v6, 0, r4 -; CHECK-P8-NEXT: addis r4, r2, .LCPI7_8@toc@ha -; CHECK-P8-NEXT: lvx v7, 0, r5 -; CHECK-P8-NEXT: vperm v4, v2, v2, v4 ; CHECK-P8-NEXT: li r5, 96 -; CHECK-P8-NEXT: addi r4, r4, .LCPI7_8@toc@l +; CHECK-P8-NEXT: xxswapd v1, vs0 +; CHECK-P8-NEXT: xxswapd v6, vs1 +; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1@toc@l +; CHECK-P8-NEXT: vperm v4, v2, v2, v4 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: xxswapd v7, vs2 +; CHECK-P8-NEXT: xxswapd v8, vs3 ; CHECK-P8-NEXT: vperm v5, v2, v2, v5 -; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: lvx v8, 0, r4 ; CHECK-P8-NEXT: vperm v0, v2, v2, v0 -; CHECK-P8-NEXT: li r4, 112 +; CHECK-P8-NEXT: xxswapd v9, vs0 ; CHECK-P8-NEXT: vperm v1, v2, v2, v1 ; CHECK-P8-NEXT: vperm v6, v2, v2, v6 ; CHECK-P8-NEXT: vperm v7, v2, v2, v7 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll @@ -64,12 +64,12 @@ ; CHECK-P8-LABEL: test8elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1 +; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt: @@ -104,18 +104,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: xvcvuxwsp v5, v5 -; CHECK-P8-NEXT: xvcvuxwsp v2, v2 -; CHECK-P8-NEXT: xvcvuxwsp v3, v3 -; CHECK-P8-NEXT: xvcvuxwsp v4, v4 -; CHECK-P8-NEXT: stvx v5, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, r3, r6 -; CHECK-P8-NEXT: stvx v4, r3, r7 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: xvcvuxwsp vs3, vs3 +; CHECK-P8-NEXT: xvcvuxwsp vs0, vs0 +; CHECK-P8-NEXT: xvcvuxwsp vs1, vs1 +; CHECK-P8-NEXT: xvcvuxwsp vs2, vs2 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -211,12 +211,12 @@ ; CHECK-P8-LABEL: test8elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v3, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: xvcvsxwsp v3, v3 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1 +; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test8elt_signed: @@ -251,18 +251,18 @@ ; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r7, 48 -; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: lvx v2, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r6 -; CHECK-P8-NEXT: lvx v4, r4, r7 -; CHECK-P8-NEXT: xvcvsxwsp v5, v5 -; CHECK-P8-NEXT: xvcvsxwsp v2, v2 -; CHECK-P8-NEXT: xvcvsxwsp v3, v3 -; CHECK-P8-NEXT: xvcvsxwsp v4, v4 -; CHECK-P8-NEXT: stvx v5, 0, r3 -; CHECK-P8-NEXT: stvx v2, r3, r5 -; CHECK-P8-NEXT: stvx v3, r3, r6 -; CHECK-P8-NEXT: stvx v4, r3, r7 +; CHECK-P8-NEXT: lxvd2x vs3, 0, r4 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 +; CHECK-P8-NEXT: lxvd2x vs2, r4, r7 +; CHECK-P8-NEXT: xvcvsxwsp vs3, vs3 +; CHECK-P8-NEXT: xvcvsxwsp vs0, vs0 +; CHECK-P8-NEXT: xvcvsxwsp vs1, vs1 +; CHECK-P8-NEXT: xvcvsxwsp vs2, vs2 +; CHECK-P8-NEXT: stxvd2x vs2, r3, r7 +; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 +; CHECK-P8-NEXT: stxvd2x vs0, r3, r5 +; CHECK-P8-NEXT: stxvd2x vs3, 0, r3 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll --- a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll +++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll @@ -4,9 +4,11 @@ define void @VPKUDUM_unary(<2 x i64>* %A) { ; CHECK-LABEL: VPKUDUM_unary: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx 2, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 34, 0 ; CHECK-NEXT: vpkudum 2, 2, 2 -; CHECK-NEXT: stvx 2, 0, 3 +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: stxvd2x 0, 0, 3 ; CHECK-NEXT: blr entry: %tmp = load <2 x i64>, <2 x i64>* %A @@ -25,10 +27,13 @@ define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) { ; CHECK-LABEL: VPKUDUM: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lvx 2, 0, 3 -; CHECK-NEXT: lvx 3, 0, 4 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: lxvd2x 1, 0, 4 +; CHECK-NEXT: xxswapd 34, 0 +; CHECK-NEXT: xxswapd 35, 1 ; CHECK-NEXT: vpkudum 2, 3, 2 -; CHECK-NEXT: stvx 2, 0, 3 +; CHECK-NEXT: xxswapd 0, 34 +; CHECK-NEXT: stxvd2x 0, 0, 3 ; CHECK-NEXT: blr entry: %tmp = load <2 x i64>, <2 x i64>* %A diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -51,7 +51,6 @@ ; PC64LE-NEXT: addi 3, 3, .LCPI2_0@toc@l ; PC64LE-NEXT: xxsldwi 4, 35, 35, 1 ; PC64LE-NEXT: xxsldwi 5, 34, 34, 1 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -64,6 +63,8 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr @@ -313,12 +314,13 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI7_0@toc@l ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 96 @@ -646,7 +648,6 @@ ; PC64LE-NEXT: addi 3, 3, .LCPI12_0@toc@l ; PC64LE-NEXT: xxsldwi 4, 35, 35, 1 ; PC64LE-NEXT: xxsldwi 5, 34, 34, 1 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -659,6 +660,8 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr @@ -807,7 +810,6 @@ ; PC64LE-NEXT: addi 3, 3, .LCPI17_0@toc@l ; PC64LE-NEXT: xxsldwi 4, 35, 35, 1 ; PC64LE-NEXT: xxsldwi 5, 34, 34, 1 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -820,6 +822,8 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr @@ -968,7 +972,6 @@ ; PC64LE-NEXT: addi 3, 3, .LCPI22_0@toc@l ; PC64LE-NEXT: xxsldwi 4, 35, 35, 1 ; PC64LE-NEXT: xxsldwi 5, 34, 34, 1 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -981,6 +984,8 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr @@ -1124,10 +1129,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI27_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI27_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xssqrtsp 0, 0 ; PC64LE-NEXT: xssqrtsp 1, 1 ; PC64LE-NEXT: xssqrtsp 2, 2 @@ -1368,12 +1374,13 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI32_0@toc@l ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 96 @@ -1800,10 +1807,11 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI37_0@toc@l ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 96 @@ -2184,11 +2192,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI42_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -2534,11 +2543,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI47_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -2884,11 +2894,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI52_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -3234,11 +3245,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI57_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -3584,11 +3596,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI62_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -3934,11 +3947,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI67_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -4284,11 +4298,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI72_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -4554,10 +4569,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI77_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI77_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xsrdpic 0, 0 ; PC64LE-NEXT: xsrdpic 1, 1 ; PC64LE-NEXT: xsrdpic 2, 2 @@ -4772,11 +4788,12 @@ ; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI82_0@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 80 @@ -5087,12 +5104,13 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI87_0@toc@l ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 96 @@ -5328,12 +5346,13 @@ ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI92_0@toc@l ; PC64LE-NEXT: xscvdpspn 34, 31 -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: lfd 31, 88(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 0, 1 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: addi 1, 1, 96 @@ -5544,7 +5563,6 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI97_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI97_0@toc@l -; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -5552,13 +5570,15 @@ ; PC64LE-NEXT: xscvdpsxws 1, 1 ; PC64LE-NEXT: xscvdpsxws 2, 2 ; PC64LE-NEXT: mffprwz 4, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mffprwz 5, 1 -; PC64LE-NEXT: mtfprwz 0, 4 -; PC64LE-NEXT: mtfprwz 1, 5 -; PC64LE-NEXT: mffprwz 4, 2 -; PC64LE-NEXT: xxmrghw 35, 1, 0 -; PC64LE-NEXT: mtvsrwz 36, 4 -; PC64LE-NEXT: vperm 2, 4, 3, 2 +; PC64LE-NEXT: mtfprwz 1, 4 +; PC64LE-NEXT: mtfprwz 3, 5 +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: mffprwz 3, 2 +; PC64LE-NEXT: xxmrghw 34, 3, 1 +; PC64LE-NEXT: mtvsrwz 36, 3 +; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32: @@ -5814,15 +5834,16 @@ ; PC64LE-NEXT: xscvdpsxws 1, 2 ; PC64LE-NEXT: addi 3, 3, .LCPI105_0@toc@l ; PC64LE-NEXT: xscvdpsxws 2, 3 -; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: mffprwz 4, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mffprwz 5, 1 -; PC64LE-NEXT: mtfprwz 0, 4 -; PC64LE-NEXT: mtfprwz 1, 5 -; PC64LE-NEXT: mffprwz 4, 2 -; PC64LE-NEXT: xxmrghw 35, 1, 0 -; PC64LE-NEXT: mtvsrwz 36, 4 -; PC64LE-NEXT: vperm 2, 4, 3, 2 +; PC64LE-NEXT: mtfprwz 1, 4 +; PC64LE-NEXT: mtfprwz 3, 5 +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: mffprwz 3, 2 +; PC64LE-NEXT: xxmrghw 34, 3, 1 +; PC64LE-NEXT: mtvsrwz 36, 3 +; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64: @@ -6038,7 +6059,6 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI113_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI113_0@toc@l -; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 @@ -6046,13 +6066,15 @@ ; PC64LE-NEXT: xscvdpuxws 1, 1 ; PC64LE-NEXT: xscvdpuxws 2, 2 ; PC64LE-NEXT: mffprwz 4, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mffprwz 5, 1 -; PC64LE-NEXT: mtfprwz 0, 4 -; PC64LE-NEXT: mtfprwz 1, 5 -; PC64LE-NEXT: mffprwz 4, 2 -; PC64LE-NEXT: xxmrghw 35, 1, 0 -; PC64LE-NEXT: mtvsrwz 36, 4 -; PC64LE-NEXT: vperm 2, 4, 3, 2 +; PC64LE-NEXT: mtfprwz 1, 4 +; PC64LE-NEXT: mtfprwz 3, 5 +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: mffprwz 3, 2 +; PC64LE-NEXT: xxmrghw 34, 3, 1 +; PC64LE-NEXT: mtvsrwz 36, 3 +; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32: @@ -6307,15 +6329,16 @@ ; PC64LE-NEXT: xscvdpuxws 1, 2 ; PC64LE-NEXT: addi 3, 3, .LCPI121_0@toc@l ; PC64LE-NEXT: xscvdpuxws 2, 3 -; PC64LE-NEXT: lvx 2, 0, 3 ; PC64LE-NEXT: mffprwz 4, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: mffprwz 5, 1 -; PC64LE-NEXT: mtfprwz 0, 4 -; PC64LE-NEXT: mtfprwz 1, 5 -; PC64LE-NEXT: mffprwz 4, 2 -; PC64LE-NEXT: xxmrghw 35, 1, 0 -; PC64LE-NEXT: mtvsrwz 36, 4 -; PC64LE-NEXT: vperm 2, 4, 3, 2 +; PC64LE-NEXT: mtfprwz 1, 4 +; PC64LE-NEXT: mtfprwz 3, 5 +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: mffprwz 3, 2 +; PC64LE-NEXT: xxmrghw 34, 3, 1 +; PC64LE-NEXT: mtvsrwz 36, 3 +; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64: @@ -6521,10 +6544,11 @@ ; PC64LE-NEXT: xsrsp 1, 2 ; PC64LE-NEXT: addi 3, 3, .LCPI129_0@toc@l ; PC64LE-NEXT: xsrsp 2, 3 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: xxmrghw 35, 1, 0 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr @@ -6720,10 +6744,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI137_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI137_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xsrdpip 0, 0 ; PC64LE-NEXT: xsrdpip 1, 1 ; PC64LE-NEXT: xsrdpip 2, 2 @@ -6835,10 +6860,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI141_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI141_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xsrdpim 0, 0 ; PC64LE-NEXT: xsrdpim 1, 1 ; PC64LE-NEXT: xsrdpim 2, 2 @@ -6949,10 +6975,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI145_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI145_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xsrdpi 0, 0 ; PC64LE-NEXT: xsrdpi 1, 1 ; PC64LE-NEXT: xsrdpi 2, 2 @@ -7064,10 +7091,11 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI149_0@toc@ha ; PC64LE-NEXT: xxsldwi 2, 34, 34, 1 ; PC64LE-NEXT: addi 3, 3, .LCPI149_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: xscvspdpn 0, 0 ; PC64LE-NEXT: xscvspdpn 1, 1 ; PC64LE-NEXT: xscvspdpn 2, 2 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: xsrdpiz 0, 0 ; PC64LE-NEXT: xsrdpiz 1, 1 ; PC64LE-NEXT: xsrdpiz 2, 2 @@ -7221,9 +7249,10 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI155_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI155_0@toc@l -; PC64LE-NEXT: lvx 3, 0, 3 +; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: addis 3, 2, .LCPI155_1@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI155_1@toc@l +; PC64LE-NEXT: xxswapd 35, 0 ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: vperm 2, 2, 2, 3 ; PC64LE-NEXT: xxswapd 35, 0 @@ -7419,10 +7448,11 @@ ; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE-NEXT: addis 3, 2, .LCPI161_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI161_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: mffprwz 4, 0 ; PC64LE-NEXT: mffprwz 5, 1 ; PC64LE-NEXT: mtfprwa 0, 4 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: mtfprwa 1, 5 ; PC64LE-NEXT: mfvsrwz 4, 34 ; PC64LE-NEXT: xscvsxdsp 0, 0 @@ -7502,14 +7532,15 @@ ; PC64LE-NEXT: mtfprd 1, 4 ; PC64LE-NEXT: addi 3, 6, .LCPI163_0@toc@l ; PC64LE-NEXT: xscvsxdsp 0, 0 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvsxdsp 1, 1 ; PC64LE-NEXT: mtfprd 2, 5 ; PC64LE-NEXT: xscvsxdsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xxmrghw 35, 1, 0 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr ; @@ -7790,7 +7821,8 @@ ; PC64LE-NEXT: addis 3, 2, .LCPI173_0@toc@ha ; PC64LE-NEXT: xxlxor 36, 36, 36 ; PC64LE-NEXT: addi 3, 3, .LCPI173_0@toc@l -; PC64LE-NEXT: lvx 3, 0, 3 +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: xxswapd 35, 0 ; PC64LE-NEXT: vperm 2, 4, 2, 3 ; PC64LE-NEXT: xvcvuxddp 34, 34 ; PC64LE-NEXT: blr @@ -7982,10 +8014,11 @@ ; PC64LE-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE-NEXT: addis 3, 2, .LCPI179_0@toc@ha ; PC64LE-NEXT: addi 3, 3, .LCPI179_0@toc@l -; PC64LE-NEXT: lvx 4, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 3 ; PC64LE-NEXT: mffprwz 4, 0 ; PC64LE-NEXT: mffprwz 5, 1 ; PC64LE-NEXT: mtfprwz 0, 4 +; PC64LE-NEXT: xxswapd 36, 3 ; PC64LE-NEXT: mtfprwz 1, 5 ; PC64LE-NEXT: mfvsrwz 4, 34 ; PC64LE-NEXT: xscvuxdsp 0, 0 @@ -8065,14 +8098,15 @@ ; PC64LE-NEXT: mtfprd 1, 4 ; PC64LE-NEXT: addi 3, 6, .LCPI181_0@toc@l ; PC64LE-NEXT: xscvuxdsp 0, 0 -; PC64LE-NEXT: lvx 4, 0, 3 ; PC64LE-NEXT: xscvuxdsp 1, 1 ; PC64LE-NEXT: mtfprd 2, 5 ; PC64LE-NEXT: xscvuxdsp 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 34, 2 +; PC64LE-NEXT: lxvd2x 2, 0, 3 ; PC64LE-NEXT: xxmrghw 35, 1, 0 +; PC64LE-NEXT: xxswapd 36, 2 ; PC64LE-NEXT: vperm 2, 2, 3, 4 ; PC64LE-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vector-ldst.ll b/llvm/test/CodeGen/PowerPC/vector-ldst.ll --- a/llvm/test/CodeGen/PowerPC/vector-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/vector-ldst.ll @@ -27,7 +27,8 @@ ; ; CHECK-P8-LE-LABEL: ld_0_vector: ; CHECK-P8-LE: # %bb.0: # %entry -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_0_vector: @@ -56,7 +57,8 @@ ; CHECK-P8-LE-LABEL: ld_unalign16_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: addi r3, r3, 1 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_unalign16_vector: @@ -87,7 +89,8 @@ ; CHECK-P8-LE-LABEL: ld_align16_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: addi r3, r3, 8 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_align16_vector: @@ -120,7 +123,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r4, 1 ; CHECK-P8-LE-NEXT: ori r4, r4, 34463 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_unalign32_vector: @@ -154,7 +158,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r4, 1525 ; CHECK-P8-LE-NEXT: ori r4, r4, 56600 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_align32_vector: @@ -195,7 +200,8 @@ ; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24 ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_unalign64_vector: @@ -235,7 +241,8 @@ ; CHECK-P8-LE-NEXT: lis r4, 3725 ; CHECK-P8-LE-NEXT: ori r4, r4, 19025 ; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_align64_vector: @@ -261,7 +268,8 @@ ; ; CHECK-P8-LE-LABEL: ld_reg_vector: ; CHECK-P8-LE: # %bb.0: # %entry -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_reg_vector: @@ -286,7 +294,8 @@ ; CHECK-P8-LE-LABEL: ld_or_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: or r3, r4, r3 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_or_vector: @@ -313,7 +322,8 @@ ; CHECK-P8-LE-LABEL: ld_or2_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_or2_vector: @@ -341,7 +351,8 @@ ; CHECK-P8-LE-LABEL: ld_not_disjoint16_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: ori r3, r3, 6 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_not_disjoint16_vector: @@ -375,7 +386,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 ; CHECK-P8-LE-NEXT: ori r3, r3, 6 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_unalign16_vector: @@ -411,7 +423,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 ; CHECK-P8-LE-NEXT: ori r3, r3, 24 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_align16_vector: @@ -441,7 +454,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: ori r3, r3, 34463 ; CHECK-P8-LE-NEXT: oris r3, r3, 1 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_not_disjoint32_vector: @@ -478,7 +492,8 @@ ; CHECK-P8-LE-NEXT: lis r4, 1 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 43 ; CHECK-P8-LE-NEXT: ori r4, r4, 34463 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_unalign32_vector: @@ -520,7 +535,8 @@ ; CHECK-P8-LE-NEXT: lis r5, 15258 ; CHECK-P8-LE-NEXT: and r3, r3, r4 ; CHECK-P8-LE-NEXT: ori r4, r5, 41712 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_align32_vector: @@ -567,7 +583,8 @@ ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 ; CHECK-P8-LE-NEXT: or r3, r3, r4 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_not_disjoint64_vector: @@ -614,7 +631,8 @@ ; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24 ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_unalign64_vector: @@ -659,7 +677,8 @@ ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23 ; CHECK-P8-LE-NEXT: ori r4, r4, 19025 ; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24 -; CHECK-P8-LE-NEXT: lvx v2, r3, r4 +; CHECK-P8-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_disjoint_align64_vector: @@ -689,7 +708,8 @@ ; CHECK-P8-LE-LABEL: ld_cst_unalign16_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r3, 255 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_unalign16_vector: @@ -712,7 +732,8 @@ ; CHECK-P8-LE-LABEL: ld_cst_align16_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r3, 4080 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_align16_vector: @@ -744,7 +765,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r3, 1 ; CHECK-P8-LE-NEXT: ori r3, r3, 34463 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_unalign32_vector: @@ -777,7 +799,8 @@ ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r3, 152 ; CHECK-P8-LE-NEXT: ori r3, r3, 38428 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_align32_vector: @@ -816,7 +839,8 @@ ; CHECK-P8-LE-NEXT: rldic r3, r3, 35, 24 ; CHECK-P8-LE-NEXT: oris r3, r3, 54437 ; CHECK-P8-LE-NEXT: ori r3, r3, 4097 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_unalign64_vector: @@ -854,7 +878,8 @@ ; CHECK-P8-LE-NEXT: lis r3, 3725 ; CHECK-P8-LE-NEXT: ori r3, r3, 19025 ; CHECK-P8-LE-NEXT: rldic r3, r3, 12, 24 -; CHECK-P8-LE-NEXT: lvx v2, 0, r3 +; CHECK-P8-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd v2, vs0 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: ld_cst_align64_vector: @@ -878,7 +903,8 @@ ; ; CHECK-P8-LE-LABEL: st_0_vector: ; CHECK-P8-LE: # %bb.0: # %entry -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_0_vector: @@ -906,8 +932,9 @@ ; ; CHECK-P8-LE-LABEL: st_unalign16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: addi r3, r3, 1 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_unalign16_vector: @@ -937,8 +964,9 @@ ; ; CHECK-P8-LE-LABEL: st_align16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: addi r3, r3, 8 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_align16_vector: @@ -969,9 +997,10 @@ ; ; CHECK-P8-LE-LABEL: st_unalign32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r4, 1 ; CHECK-P8-LE-NEXT: ori r4, r4, 34463 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_unalign32_vector: @@ -1003,9 +1032,10 @@ ; ; CHECK-P8-LE-LABEL: st_align32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r4, 1525 ; CHECK-P8-LE-NEXT: ori r4, r4, 56600 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_align32_vector: @@ -1043,10 +1073,11 @@ ; CHECK-P8-LE-LABEL: st_unalign64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r4, 29 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24 ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_unalign64_vector: @@ -1084,9 +1115,10 @@ ; CHECK-P8-LE-LABEL: st_align64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r4, 3725 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: ori r4, r4, 19025 ; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_align64_vector: @@ -1112,7 +1144,8 @@ ; ; CHECK-P8-LE-LABEL: st_reg_vector: ; CHECK-P8-LE: # %bb.0: # %entry -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_reg_vector: @@ -1136,8 +1169,9 @@ ; ; CHECK-P8-LE-LABEL: st_or1_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: or r3, r4, r3 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_or1_vector: @@ -1163,8 +1197,9 @@ ; ; CHECK-P8-LE-LABEL: st_or2_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_or2_vector: @@ -1191,8 +1226,9 @@ ; ; CHECK-P8-LE-LABEL: st_not_disjoint16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: ori r3, r3, 6 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_not_disjoint16_vector: @@ -1224,9 +1260,10 @@ ; ; CHECK-P8-LE-LABEL: st_disjoint_unalign16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 ; CHECK-P8-LE-NEXT: ori r3, r3, 6 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_unalign16_vector: @@ -1260,9 +1297,10 @@ ; ; CHECK-P8-LE-LABEL: st_disjoint_align16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 51 ; CHECK-P8-LE-NEXT: ori r3, r3, 24 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_align16_vector: @@ -1290,9 +1328,10 @@ ; ; CHECK-P8-LE-LABEL: st_not_disjoint32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: ori r3, r3, 34463 ; CHECK-P8-LE-NEXT: oris r3, r3, 1 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_not_disjoint32_vector: @@ -1326,10 +1365,11 @@ ; ; CHECK-P8-LE-LABEL: st_disjoint_unalign32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r4, 1 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 43 ; CHECK-P8-LE-NEXT: ori r4, r4, 34463 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_unalign32_vector: @@ -1367,11 +1407,12 @@ ; ; CHECK-P8-LE-LABEL: st_disjoint_align32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r4, -15264 ; CHECK-P8-LE-NEXT: lis r5, 15258 ; CHECK-P8-LE-NEXT: and r3, r3, r4 ; CHECK-P8-LE-NEXT: ori r4, r5, 41712 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_align32_vector: @@ -1414,11 +1455,12 @@ ; CHECK-P8-LE-LABEL: st_not_disjoint64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r4, 29 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24 ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 ; CHECK-P8-LE-NEXT: or r3, r3, r4 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_not_disjoint64_vector: @@ -1461,11 +1503,12 @@ ; CHECK-P8-LE-LABEL: st_disjoint_unalign64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r4, 29 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23 ; CHECK-P8-LE-NEXT: rldic r4, r4, 35, 24 ; CHECK-P8-LE-NEXT: oris r4, r4, 54437 ; CHECK-P8-LE-NEXT: ori r4, r4, 4097 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_unalign64_vector: @@ -1507,10 +1550,11 @@ ; CHECK-P8-LE-LABEL: st_disjoint_align64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r4, 3725 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldicr r3, r3, 0, 23 ; CHECK-P8-LE-NEXT: ori r4, r4, 19025 ; CHECK-P8-LE-NEXT: rldic r4, r4, 12, 24 -; CHECK-P8-LE-NEXT: stvx v2, r3, r4 +; CHECK-P8-LE-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_disjoint_align64_vector: @@ -1539,8 +1583,9 @@ ; ; CHECK-P8-LE-LABEL: st_cst_unalign16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: li r3, 255 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_unalign16_vector: @@ -1562,8 +1607,9 @@ ; ; CHECK-P8-LE-LABEL: st_cst_align16_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: li r3, 4080 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_align16_vector: @@ -1593,9 +1639,10 @@ ; ; CHECK-P8-LE-LABEL: st_cst_unalign32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r3, 1 ; CHECK-P8-LE-NEXT: ori r3, r3, 34463 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_unalign32_vector: @@ -1626,9 +1673,10 @@ ; ; CHECK-P8-LE-LABEL: st_cst_align32_vector: ; CHECK-P8-LE: # %bb.0: # %entry +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: lis r3, 152 ; CHECK-P8-LE-NEXT: ori r3, r3, 38428 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_align32_vector: @@ -1664,10 +1712,11 @@ ; CHECK-P8-LE-LABEL: st_cst_unalign64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: li r3, 29 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: rldic r3, r3, 35, 24 ; CHECK-P8-LE-NEXT: oris r3, r3, 54437 ; CHECK-P8-LE-NEXT: ori r3, r3, 4097 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_unalign64_vector: @@ -1703,9 +1752,10 @@ ; CHECK-P8-LE-LABEL: st_cst_align64_vector: ; CHECK-P8-LE: # %bb.0: # %entry ; CHECK-P8-LE-NEXT: lis r3, 3725 +; CHECK-P8-LE-NEXT: xxswapd vs0, v2 ; CHECK-P8-LE-NEXT: ori r3, r3, 19025 ; CHECK-P8-LE-NEXT: rldic r3, r3, 12, 24 -; CHECK-P8-LE-NEXT: stvx v2, 0, r3 +; CHECK-P8-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-P8-LE-NEXT: blr ; ; CHECK-P8-BE-LABEL: st_cst_align64_vector: diff --git a/llvm/test/CodeGen/PowerPC/vector-rotates.ll b/llvm/test/CodeGen/PowerPC/vector-rotates.ll --- a/llvm/test/CodeGen/PowerPC/vector-rotates.ll +++ b/llvm/test/CodeGen/PowerPC/vector-rotates.ll @@ -11,7 +11,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd vs35, vs0 ; CHECK-P8-NEXT: vrlb v2, v2, v3 ; CHECK-P8-NEXT: blr ; @@ -34,7 +35,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd vs35, vs0 ; CHECK-P8-NEXT: vrlh v2, v2, v3 ; CHECK-P8-NEXT: blr ; @@ -57,7 +59,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd vs35, vs0 ; CHECK-P8-NEXT: vrlw v2, v2, v3 ; CHECK-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vselect-constants.ll b/llvm/test/CodeGen/PowerPC/vselect-constants.ll --- a/llvm/test/CodeGen/PowerPC/vselect-constants.ll +++ b/llvm/test/CodeGen/PowerPC/vselect-constants.ll @@ -16,11 +16,13 @@ ; CHECK-NEXT: addis 4, 2, .LCPI0_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l ; CHECK-NEXT: addi 4, 4, .LCPI0_1@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: lxvd2x 1, 0, 4 ; CHECK-NEXT: vsubuwm 3, 4, 3 -; CHECK-NEXT: lvx 4, 0, 4 ; CHECK-NEXT: vslw 2, 2, 3 +; CHECK-NEXT: xxswapd 36, 1 ; CHECK-NEXT: vsraw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: xxsel 34, 36, 35, 34 ; CHECK-NEXT: blr %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -30,13 +32,15 @@ define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_C1_or_C2_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK-NEXT: addis 4, 2, .LCPI1_1@toc@ha +; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l ; CHECK-NEXT: addi 4, 4, .LCPI1_1@toc@l -; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: lvx 4, 0, 4 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: lxvd2x 1, 0, 4 +; CHECK-NEXT: xxswapd 35, 0 +; CHECK-NEXT: xxswapd 36, 1 ; CHECK-NEXT: xxsel 34, 36, 35, 34 ; CHECK-NEXT: blr %cond = icmp eq <4 x i32> %x, %y @@ -47,12 +51,13 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: vspltisw 3, 1 ; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-NEXT: vspltisw 3, 1 ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: vadduwm 2, 2, 3 +; CHECK-NEXT: xxswapd 36, 0 +; CHECK-NEXT: vadduwm 2, 2, 4 ; CHECK-NEXT: blr %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> ret <4 x i32> %add @@ -61,10 +66,11 @@ define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addi 3, 3, .LCPI3_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vsubuwm 2, 3, 2 ; CHECK-NEXT: blr %cond = icmp eq <4 x i32> %x, %y @@ -79,10 +85,11 @@ ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l +; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: vsubuwm 3, 4, 3 ; CHECK-NEXT: vslw 2, 2, 3 ; CHECK-NEXT: vsraw 2, 2, 3 -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -92,10 +99,11 @@ define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; CHECK-NEXT: vcmpequw 2, 2, 3 ; CHECK-NEXT: addi 3, 3, .LCPI5_0@toc@l -; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %cond = icmp eq <4 x i32> %x, %y diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll --- a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll @@ -14,10 +14,8 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t -; RUN: grep lxvd2x < %t | count 3 -; RUN: grep lvx < %t | count 3 -; RUN: grep stxvd2x < %t | count 3 -; RUN: grep stvx < %t | count 3 +; RUN: grep lxvd2x < %t | count 6 +; RUN: grep stxvd2x < %t | count 6 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1038,7 +1038,8 @@ ; ; CHECK-LE-LABEL: test32: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: lvx v2, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-NEXT: xxswapd v2, vs0 ; CHECK-LE-NEXT: blr %v = load <4 x float>, <4 x float>* %a, align 16 ret <4 x float> %v @@ -1065,7 +1066,8 @@ ; ; CHECK-LE-LABEL: test33: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: stvx v2, 0, r3 +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: blr store <4 x float> %b, <4 x float>* %a, align 16 ret void @@ -1159,7 +1161,8 @@ ; ; CHECK-LE-LABEL: test34: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: lvx v2, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-NEXT: xxswapd v2, vs0 ; CHECK-LE-NEXT: blr %v = load <4 x i32>, <4 x i32>* %a, align 16 ret <4 x i32> %v @@ -1186,7 +1189,8 @@ ; ; CHECK-LE-LABEL: test35: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: stvx v2, 0, r3 +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: blr store <4 x i32> %b, <4 x i32>* %a, align 16 ret void @@ -2282,9 +2286,10 @@ ; CHECK-LE: # %bb.0: ; CHECK-LE-NEXT: addis r3, r2, .LCPI63_0@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI63_0@toc@l -; CHECK-LE-NEXT: lvx v3, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: addis r3, r2, .LCPI63_1@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI63_1@toc@l +; CHECK-LE-NEXT: xxswapd v3, vs0 ; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: xxswapd v3, vs0 @@ -2362,9 +2367,10 @@ ; CHECK-LE: # %bb.0: ; CHECK-LE-NEXT: addis r3, r2, .LCPI64_0@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI64_0@toc@l -; CHECK-LE-NEXT: lvx v3, 0, r3 +; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: addis r3, r2, .LCPI64_1@toc@ha ; CHECK-LE-NEXT: addi r3, r3, .LCPI64_1@toc@l +; CHECK-LE-NEXT: xxswapd v3, vs0 ; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 ; CHECK-LE-NEXT: vperm v2, v2, v2, v3 ; CHECK-LE-NEXT: xxswapd v3, vs0 @@ -2419,11 +2425,12 @@ ; ; CHECK-LE-LABEL: test80: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: mtfprwz f0, r3 ; CHECK-LE-NEXT: addis r4, r2, .LCPI65_0@toc@ha -; CHECK-LE-NEXT: addi r3, r4, .LCPI65_0@toc@l -; CHECK-LE-NEXT: xxspltw v2, vs0, 1 -; CHECK-LE-NEXT: lvx v3, 0, r3 +; CHECK-LE-NEXT: mtfprwz f1, r3 +; CHECK-LE-NEXT: addi r4, r4, .LCPI65_0@toc@l +; CHECK-LE-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-NEXT: xxspltw v2, vs1, 1 +; CHECK-LE-NEXT: xxswapd v3, vs0 ; CHECK-LE-NEXT: vadduwm v2, v2, v3 ; CHECK-LE-NEXT: blr %b1 = insertelement <2 x i32> undef, i32 %v, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll --- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -94,8 +94,7 @@ define double @teste0(<2 x double>* %p1) { ; CHECK-LABEL: teste0: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x vs1, 0, r3 -; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: lfd f1, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-P8-BE-LABEL: teste0: @@ -105,8 +104,7 @@ ; ; CHECK-P9-VECTOR-LABEL: teste0: ; CHECK-P9-VECTOR: # %bb.0: -; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3 -; CHECK-P9-VECTOR-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P9-VECTOR-NEXT: lfd f1, 0(r3) ; CHECK-P9-VECTOR-NEXT: blr ; ; CHECK-P9-LABEL: teste0: @@ -123,9 +121,7 @@ define double @teste1(<2 x double>* %p1) { ; CHECK-LABEL: teste1: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: xxswapd vs1, vs0 -; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: lfd f1, 8(r3) ; CHECK-NEXT: blr ; ; CHECK-P8-BE-LABEL: teste1: @@ -135,9 +131,7 @@ ; ; CHECK-P9-VECTOR-LABEL: teste1: ; CHECK-P9-VECTOR: # %bb.0: -; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0 -; CHECK-P9-VECTOR-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-P9-VECTOR-NEXT: lfd f1, 8(r3) ; CHECK-P9-VECTOR-NEXT: blr ; ; CHECK-P9-LABEL: teste1: diff --git a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll --- a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll @@ -12,8 +12,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test00: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxspltd 34, 0, 0 +; CHECK-NEXT: lxvdsx 34, 0, 3 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: test00: @@ -106,8 +105,8 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxspltd 34, 0, 1 +; CHECK-NEXT: addi 3, 3, 8 +; CHECK-NEXT: lxvdsx 34, 0, 3 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: test11: @@ -212,8 +211,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x 0, 0, 4 -; CHECK-NEXT: xxspltd 34, 0, 0 +; CHECK-NEXT: lxvdsx 34, 0, 4 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: test22: @@ -306,8 +304,8 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test33: ; CHECK: # %bb.0: -; CHECK-NEXT: lxvd2x 0, 0, 4 -; CHECK-NEXT: xxspltd 34, 0, 1 +; CHECK-NEXT: addi 3, 4, 8 +; CHECK-NEXT: lxvdsx 34, 0, 3 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: test33: