diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10257,11 +10257,6 @@ bool isLittleEndian = Subtarget.isLittleEndian(); bool isPPC64 = Subtarget.isPPC64(); - // Only need to place items backwards in LE, - // the mask will be properly calculated. - if (isLittleEndian) - std::swap(V1, V2); - if (Subtarget.hasVSX() && Subtarget.hasP9Vector() && (V1->hasOneUse() || V2->hasOneUse())) { LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using " @@ -10271,7 +10266,8 @@ // The second input to XXPERM is also an output so if the second input has // multiple uses then copying is necessary, as a result we want the // single-use operand to be used as the second input to prevent copying. - if (!V2->hasOneUse() && V1->hasOneUse()) { + if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) || + (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) { std::swap(V1, V2); NeedSwap = !NeedSwap; } @@ -10310,27 +10306,24 @@ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; - if (Opcode == PPCISD::XXPERM) { - if (V1HasXXSWAPD) { - if (SrcElt < 8) - SrcElt += 8; - else if (SrcElt < 16) - SrcElt -= 8; - } - if (V2HasXXSWAPD) { - if (SrcElt > 23) - SrcElt -= 8; - else if (SrcElt > 15) - SrcElt += 8; - } - if (NeedSwap) { - if (SrcElt < 16) - SrcElt += 16; - else - SrcElt -= 16; - } + if (V1HasXXSWAPD) { + if (SrcElt < 8) + SrcElt += 8; + else if (SrcElt < 16) + SrcElt -= 8; + } + if (V2HasXXSWAPD) { + if (SrcElt > 23) + SrcElt -= 8; + else if (SrcElt > 15) + SrcElt += 8; + } + if (NeedSwap) { + if (SrcElt < 16) + SrcElt += 16; + else + SrcElt -= 16; } - for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back( @@ -10340,18 +10333,19 @@ DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32)); } - if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) { - if (V1HasXXSWAPD) { - dl = SDLoc(V1->getOperand(0)); - V1 = V1->getOperand(0)->getOperand(1); - } - if (V2HasXXSWAPD) { - dl = SDLoc(V2->getOperand(0)); - V2 = V2->getOperand(0)->getOperand(1); - } - if (isPPC64 && ValType != MVT::v2f64) + if (V1HasXXSWAPD) { + dl = SDLoc(V1->getOperand(0)); + V1 = V1->getOperand(0)->getOperand(1); + } + if (V2HasXXSWAPD) { + dl = SDLoc(V2->getOperand(0)); + V2 = V2->getOperand(0)->getOperand(1); + } + + if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM)) { + if (ValType != MVT::v2f64) V1 = DAG.getBitcast(MVT::v2f64, V1); - if (isPPC64 && V2.getValueType() != MVT::v2f64) + if (V2.getValueType() != MVT::v2f64) V2 = DAG.getBitcast(MVT::v2f64, V2); } @@ -10372,6 +10366,11 @@ if (Opcode == PPCISD::XXPERM) VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask); + // Only need to place items backwards in LE, + // the mask was properly calculated. + if (isLittleEndian) + std::swap(V1, V2); + SDValue VPERMNode = DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask); diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1058,15 +1058,14 @@ ; ; P8LE-LABEL: fromDiffMemVarDi: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l ; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r5 ; P8LE-NEXT: addi r3, r3, -12 -; P8LE-NEXT: lxvd2x vs1, 0, r4 -; P8LE-NEXT: lxvd2x vs0, 0, r3 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: @@ -1479,11 +1478,10 @@ ; P8LE-LABEL: fromDiffMemConsDConvftoi: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha -; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: lxvd2x v2, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l -; P8LE-NEXT: lxvd2x vs1, 0, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v3, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspsxws v2, v2 ; P8LE-NEXT: blr @@ -2580,15 +2578,14 @@ ; ; P8LE-LABEL: fromDiffMemVarDui: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: sldi r4, r4, 2 ; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha +; P8LE-NEXT: sldi r4, r4, 2 +; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l ; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l +; P8LE-NEXT: lxvd2x vs0, 0, r5 ; P8LE-NEXT: addi r3, r3, -12 -; P8LE-NEXT: lxvd2x vs1, 0, r4 -; P8LE-NEXT: lxvd2x vs0, 0, r3 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: @@ -3001,11 +2998,10 @@ ; P8LE-LABEL: fromDiffMemConsDConvftoui: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha -; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: lxvd2x v2, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l -; P8LE-NEXT: lxvd2x vs1, 0, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: lxvd2x vs0, 0, r4 +; P8LE-NEXT: xxswapd v3, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspuxws v2, v2 ; P8LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -491,10 +491,10 @@ ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: lxsd v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha -; CHECK-P9-BE-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l -; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) -; CHECK-P9-BE-NEXT: xxperm v2, vs1, vs0 +; CHECK-P9-BE-NEXT: lxv vs1, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1 ; CHECK-P9-BE-NEXT: blr ; ; CHECK-NOVSX-LABEL: testmrglb3: diff --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll --- a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll @@ -66,11 +66,11 @@ ; BE-LABEL: shufflevector_combine: ; BE: # %bb.0: # %newFuncRoot ; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; BE-NEXT: xxlxor vs1, vs1, vs1 +; BE-NEXT: xxlxor vs0, vs0, vs0 ; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; BE-NEXT: lxv vs0, 0(r3) +; BE-NEXT: lxv vs1, 0(r3) ; BE-NEXT: li r3, 0 -; BE-NEXT: xxperm v2, vs1, vs0 +; BE-NEXT: xxperm v2, vs0, vs1 ; BE-NEXT: vinsw v2, r3, 8 ; BE-NEXT: vpkuwum v2, v2, v2 ; BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -44,15 +44,15 @@ ; P9BE-NEXT: add 5, 3, 4 ; P9BE-NEXT: lxsdx 2, 3, 4 ; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9BE-NEXT: xxlxor 1, 1, 1 +; P9BE-NEXT: xxlxor 0, 0, 0 ; P9BE-NEXT: vspltisw 4, 8 ; P9BE-NEXT: lxsd 3, 4(5) ; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l ; P9BE-NEXT: vadduwm 4, 4, 4 -; P9BE-NEXT: lxv 0, 0(3) +; P9BE-NEXT: lxv 1, 0(3) ; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; P9BE-NEXT: xxperm 2, 1, 0 +; P9BE-NEXT: xxperm 2, 0, 1 ; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: xxperm 3, 3, 0 ; P9BE-NEXT: vnegw 3, 3 @@ -280,10 +280,10 @@ ; P9BE-NEXT: xxperm 2, 3, 0 ; P9BE-NEXT: xxperm 1, 3, 0 ; P9BE-NEXT: vsplth 3, 3, 3 -; P9BE-NEXT: lxv 0, 0(3) +; P9BE-NEXT: xxmrghw 0, 3, 1 +; P9BE-NEXT: lxv 1, 0(3) ; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxmrghw 3, 3, 1 -; P9BE-NEXT: xxperm 2, 3, 0 +; P9BE-NEXT: xxperm 2, 0, 1 ; P9BE-NEXT: xxspltw 3, 2, 1 ; P9BE-NEXT: vadduwm 2, 2, 3 ; P9BE-NEXT: vextuwlx 3, 3, 2 @@ -306,10 +306,10 @@ ; P9BE-AIX-NEXT: xxperm 2, 3, 0 ; P9BE-AIX-NEXT: xxperm 1, 3, 0 ; P9BE-AIX-NEXT: vsplth 3, 3, 3 -; P9BE-AIX-NEXT: lxv 0, 0(3) +; P9BE-AIX-NEXT: xxmrghw 0, 3, 1 +; P9BE-AIX-NEXT: lxv 1, 0(3) ; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxmrghw 3, 3, 1 -; P9BE-AIX-NEXT: xxperm 2, 3, 0 +; P9BE-AIX-NEXT: xxperm 2, 0, 1 ; P9BE-AIX-NEXT: xxspltw 3, 2, 1 ; P9BE-AIX-NEXT: vadduwm 2, 2, 3 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 @@ -389,13 +389,13 @@ ; P9LE-NEXT: vmrghb 2, 3, 2 ; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l ; P9LE-NEXT: vmrglh 2, 2, 4 -; P9LE-NEXT: lxv 0, 0(3) +; P9LE-NEXT: lxv 1, 0(3) ; P9LE-NEXT: li 3, 0 ; P9LE-NEXT: vmrghb 3, 3, 5 ; P9LE-NEXT: xxmrglw 2, 2, 4 ; P9LE-NEXT: vmrglh 3, 3, 4 -; P9LE-NEXT: xxmrglw 3, 4, 3 -; P9LE-NEXT: xxperm 2, 3, 0 +; P9LE-NEXT: xxmrglw 0, 4, 3 +; P9LE-NEXT: xxperm 2, 0, 1 ; P9LE-NEXT: xxspltw 3, 2, 2 ; P9LE-NEXT: vadduwm 2, 2, 3 ; P9LE-NEXT: vextuwrx 3, 3, 2 diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -184,12 +184,11 @@ ; CHECK-LE-P8-LABEL: test_none_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4 ; CHECK-LE-P8-NEXT: mtvsrd v4, r3 ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P8-NEXT: blr ; @@ -432,12 +431,11 @@ ; CHECK-LE-P8-LABEL: test_none_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4 ; CHECK-LE-P8-NEXT: mtvsrd v4, r3 ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -389,10 +389,10 @@ ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1 ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -411,10 +411,10 @@ ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1 ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; @@ -470,15 +470,14 @@ ; CHECK-LE-P8-LABEL: test_none_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxvd2x v3, 0, r4 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v4, vs1 ; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 @@ -545,10 +544,10 @@ ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 -; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) -; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0 ; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr @@ -604,10 +603,10 @@ ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfd f0, 0(r3) ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1 ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -626,10 +625,10 @@ ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lfd f0, 0(r3) ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1 ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll --- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -203,13 +203,13 @@ ; ; CHECK-P9-LABEL: test2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 -; CHECK-P9-NEXT: xvcvuxddp vs0, vs0 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp vs0, vs1 ; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -80,13 +80,13 @@ ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f0, r3 +; CHECK-BE-NEXT: mtfprd f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 -; CHECK-BE-NEXT: xvcvuxwsp v2, vs0 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <4 x i16> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -24,24 +24,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 -; CHECK-P9-NEXT: xvcvuxddp v2, vs0 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 -; CHECK-BE-NEXT: xvcvuxddp v2, vs0 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxddp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <2 x i16> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -78,24 +78,24 @@ ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 -; CHECK-P9-NEXT: xvcvuxwsp v2, vs0 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxwsp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 -; CHECK-BE-NEXT: xvcvuxwsp v2, vs0 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <4 x i8> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -24,24 +24,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: mtfprd f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 -; CHECK-P9-NEXT: xvcvuxddp v2, vs0 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs1, 0(r3) -; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 -; CHECK-BE-NEXT: xvcvuxddp v2, vs0 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxddp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i16 %a.coerce to <2 x i8> diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -92,8 +92,8 @@ ; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32( @@ -368,9 +368,9 @@ ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI7_0@toc@l ; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -689,8 +689,8 @@ ; PC64LE9-NEXT: xsmulsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( @@ -851,8 +851,8 @@ ; PC64LE9-NEXT: xsaddsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( @@ -1013,8 +1013,8 @@ ; PC64LE9-NEXT: xssubsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( @@ -1160,9 +1160,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32( @@ -1428,9 +1428,9 @@ ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI32_0@toc@l ; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1856,9 +1856,9 @@ ; PC64LE9-NEXT: ld 30, 48(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI37_0@toc@l ; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2236,9 +2236,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI42_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2587,9 +2587,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI47_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2938,9 +2938,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI52_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3289,9 +3289,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI57_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3640,9 +3640,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI62_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3991,9 +3991,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI67_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4342,9 +4342,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI72_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4600,9 +4600,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( @@ -4832,9 +4832,9 @@ ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI82_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5158,9 +5158,9 @@ ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI87_0@toc@l ; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5400,9 +5400,9 @@ ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI92_0@toc@l ; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5587,22 +5587,22 @@ ; PC64LE9-NEXT: xxswapd 1, 34 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvspdpn 1, 1 +; PC64LE9-NEXT: xxsldwi 2, 34, 34, 1 +; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvdpsxws 0, 0 ; PC64LE9-NEXT: xscvdpsxws 1, 1 +; PC64LE9-NEXT: xscvdpsxws 2, 2 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xscvdpsxws 1, 1 -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: mffprwz 3, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( @@ -5850,18 +5850,18 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xscvdpsxws 0, 1 ; PC64LE9-NEXT: xscvdpsxws 1, 2 +; PC64LE9-NEXT: xscvdpsxws 2, 3 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xscvdpsxws 1, 3 ; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: mffprwz 3, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( @@ -6083,22 +6083,22 @@ ; PC64LE9-NEXT: xxswapd 1, 34 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvspdpn 1, 1 +; PC64LE9-NEXT: xxsldwi 2, 34, 34, 1 +; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvdpuxws 0, 0 ; PC64LE9-NEXT: xscvdpuxws 1, 1 +; PC64LE9-NEXT: xscvdpuxws 2, 2 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xscvdpuxws 1, 1 -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: mffprwz 3, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( @@ -6345,18 +6345,18 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xscvdpuxws 0, 1 ; PC64LE9-NEXT: xscvdpuxws 1, 2 +; PC64LE9-NEXT: xscvdpuxws 2, 3 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xscvdpuxws 1, 3 ; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: mffprwz 3, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( @@ -6557,15 +6557,15 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xsrsp 0, 1 ; PC64LE9-NEXT: xsrsp 1, 2 +; PC64LE9-NEXT: xsrsp 2, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI129_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: xsrsp 1, 3 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( @@ -6775,9 +6775,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( @@ -6891,9 +6891,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( @@ -7006,9 +7006,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( @@ -7122,9 +7122,9 @@ ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: xxmrghw 35, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 35, 1 +; PC64LE9-NEXT: xxmrghw 1, 1, 2 +; PC64LE9-NEXT: lxv 2, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 2 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( @@ -7479,13 +7479,13 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwa 1, 3 -; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: mtfprwa 2, 3 +; PC64LE9-NEXT: xscvsxdsp 2, 2 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7547,18 +7547,18 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mtfprd 0, 3 ; PC64LE9-NEXT: mtfprd 1, 4 +; PC64LE9-NEXT: mtfprd 2, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI163_0@toc@ha ; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: xscvsxdsp 1, 1 +; PC64LE9-NEXT: xscvsxdsp 2, 2 ; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: mtfprd 1, 5 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7829,10 +7829,10 @@ ; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI173_0@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 +; PC64LE9-NEXT: xxlxor 0, 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI173_0@toc@l -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: xvcvuxddp 34, 34 ; PC64LE9-NEXT: blr entry: @@ -8045,13 +8045,13 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI179_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: mtfprwz 2, 3 +; PC64LE9-NEXT: xscvuxdsp 2, 2 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -8113,18 +8113,18 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mtfprd 0, 3 ; PC64LE9-NEXT: mtfprd 1, 4 +; PC64LE9-NEXT: mtfprd 2, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI181_0@toc@ha ; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: xscvuxdsp 1, 1 +; PC64LE9-NEXT: xscvuxdsp 2, 2 ; PC64LE9-NEXT: addi 3, 3, .LCPI181_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 35, 1, 0 -; PC64LE9-NEXT: mtfprd 1, 5 -; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 34, 1 -; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: xscvdpspn 34, 2 +; PC64LE9-NEXT: xxmrghw 0, 1, 0 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> diff --git a/llvm/test/CodeGen/PowerPC/vperm-swap.ll b/llvm/test/CodeGen/PowerPC/vperm-swap.ll --- a/llvm/test/CodeGen/PowerPC/vperm-swap.ll +++ b/llvm/test/CodeGen/PowerPC/vperm-swap.ll @@ -4,31 +4,30 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) { ; CHECK-LE-P8: .LCPI0_0: -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 30 # 0x1e +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 22 # 0x16 ; CHECK-LE-P8-NEXT: .byte 7 # 0x7 -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f -; CHECK-LE-P8-NEXT: .byte 31 # 0x1f +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 +; CHECK-LE-P8-NEXT: .byte 23 # 0x17 ; CHECK-LE-P8-LABEL: test_none_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4 ; CHECK-LE-P8-NEXT: mtvsrd v4, r3 ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v2, vs0 -; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P8-NEXT: blr entry: