diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10186,9 +10186,6 @@ } } - bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; - bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; - // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. @@ -10199,6 +10196,13 @@ EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits() / 8; + bool V1HasXXSWAPD = Opcode == PPCISD::XXPERM + ? V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD + : false; + bool V2HasXXSWAPD = Opcode == PPCISD::XXPERM + ? V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD + : false; + /* Vectors will be appended like so: [ V1 | v2 ] XXSWAPD on V1: @@ -10219,24 +10223,27 @@ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; - if (V1HasXXSWAPD) { - if (SrcElt < 8) - SrcElt += 8; - else if (SrcElt < 16) - SrcElt -= 8; - } - if (V2HasXXSWAPD) { - if (SrcElt > 23) - SrcElt -= 8; - else if (SrcElt > 15) - SrcElt += 8; - } - if (NeedSwap) { - if (SrcElt < 16) - SrcElt += 16; - else - SrcElt -= 16; + if (Opcode == PPCISD::XXPERM) { + if (V1HasXXSWAPD) { + if (SrcElt < 8) + SrcElt += 8; + else if (SrcElt < 16) + SrcElt -= 8; + } + if (V2HasXXSWAPD) { + if (SrcElt > 23) + SrcElt -= 8; + else if (SrcElt > 15) + SrcElt += 8; + } + if (NeedSwap) { + if (SrcElt < 16) + SrcElt += 16; + else + SrcElt -= 16; + } } + for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back( @@ -10246,20 +10253,21 @@ DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32)); } - if (V1HasXXSWAPD) { - dl = SDLoc(V1->getOperand(0)); - V1 = V1->getOperand(0)->getOperand(1); - } - if (V2HasXXSWAPD) { - dl = SDLoc(V2->getOperand(0)); - V2 = V2->getOperand(0)->getOperand(1); - } - - if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) { - if (isPPC64 && ValType != MVT::v2f64) - V1 = DAG.getBitcast(MVT::v2f64, V1); - if (isPPC64 && V2.getValueType() != MVT::v2f64) - V2 = DAG.getBitcast(MVT::v2f64, V2); + if (Opcode == PPCISD::XXPERM) { + if (V1HasXXSWAPD) { + dl = SDLoc(V1->getOperand(0)); + V1 = V1->getOperand(0)->getOperand(1); + } + if (V2HasXXSWAPD) { + dl = SDLoc(V2->getOperand(0)); + V2 = V2->getOperand(0)->getOperand(1); + } + if (V1HasXXSWAPD || V2HasXXSWAPD) { + if (isPPC64 && ValType != MVT::v2f64) + V1 = DAG.getBitcast(MVT::v2f64, V1); + if (isPPC64 && V2.getValueType() != MVT::v2f64) + V2 = DAG.getBitcast(MVT::v2f64, V2); + } } ShufflesHandledWithVPERM++; diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1058,14 +1058,15 @@ ; ; P8LE-LABEL: fromDiffMemVarDi: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha ; P8LE-NEXT: sldi r4, r4, 2 -; P8LE-NEXT: addi r5, r5, .LCPI9_0@toc@l +; P8LE-NEXT: addis r5, r2, .LCPI9_0@toc@ha ; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: lxvd2x vs0, 0, r5 +; P8LE-NEXT: addi r4, r5, .LCPI9_0@toc@l ; P8LE-NEXT: addi r3, r3, -12 -; P8LE-NEXT: lxvd2x v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: @@ -1478,10 +1479,11 @@ ; P8LE-LABEL: fromDiffMemConsDConvftoi: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI18_0@toc@ha -; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI18_0@toc@l -; P8LE-NEXT: lxvd2x vs0, 0, r4 -; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspsxws v2, v2 ; P8LE-NEXT: blr @@ -2578,14 +2580,15 @@ ; ; P8LE-LABEL: fromDiffMemVarDui: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha ; P8LE-NEXT: sldi r4, r4, 2 -; P8LE-NEXT: addi r5, r5, .LCPI41_0@toc@l +; P8LE-NEXT: addis r5, r2, .LCPI41_0@toc@ha ; P8LE-NEXT: add r3, r3, r4 -; P8LE-NEXT: lxvd2x vs0, 0, r5 +; P8LE-NEXT: addi r4, r5, .LCPI41_0@toc@l ; P8LE-NEXT: addi r3, r3, -12 -; P8LE-NEXT: lxvd2x v2, 0, r3 -; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: blr entry: @@ -2998,10 +3001,11 @@ ; P8LE-LABEL: fromDiffMemConsDConvftoui: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addis r4, r2, .LCPI50_0@toc@ha -; P8LE-NEXT: lxvd2x v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 ; P8LE-NEXT: addi r4, r4, .LCPI50_0@toc@l -; P8LE-NEXT: lxvd2x vs0, 0, r4 -; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: lxvd2x vs1, 0, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: vperm v2, v2, v2, v3 ; P8LE-NEXT: xvcvspuxws v2, v2 ; P8LE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -491,10 +491,10 @@ ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: lxsd v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha -; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-BE-NEXT: xxlxor vs1, vs1, vs1 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l -; CHECK-P9-BE-NEXT: lxv vs1, 0(r3) -; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, vs1, vs0 ; CHECK-P9-BE-NEXT: blr ; ; CHECK-NOVSX-LABEL: testmrglb3: diff --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll --- a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll @@ -66,11 +66,11 @@ ; BE-LABEL: shufflevector_combine: ; BE: # %bb.0: # %newFuncRoot ; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; BE-NEXT: xxlxor vs0, vs0, vs0 +; BE-NEXT: xxlxor vs1, vs1, vs1 ; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; BE-NEXT: lxv vs1, 0(r3) +; BE-NEXT: lxv vs0, 0(r3) ; BE-NEXT: li r3, 0 -; BE-NEXT: xxperm v2, vs0, vs1 +; BE-NEXT: xxperm v2, vs1, vs0 ; BE-NEXT: vinsw v2, r3, 8 ; BE-NEXT: vpkuwum v2, v2, v2 ; BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -44,15 +44,15 @@ ; P9BE-NEXT: add 5, 3, 4 ; P9BE-NEXT: lxsdx 2, 3, 4 ; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9BE-NEXT: xxlxor 0, 0, 0 +; P9BE-NEXT: xxlxor 1, 1, 1 ; P9BE-NEXT: vspltisw 4, 8 ; P9BE-NEXT: lxsd 3, 4(5) ; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l ; P9BE-NEXT: vadduwm 4, 4, 4 -; P9BE-NEXT: lxv 1, 0(3) +; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; P9BE-NEXT: xxperm 2, 0, 1 +; P9BE-NEXT: xxperm 2, 1, 0 ; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: xxperm 3, 3, 0 ; P9BE-NEXT: vnegw 3, 3 @@ -285,10 +285,10 @@ ; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha ; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l ; P9BE-NEXT: xxperm 2, 0, 1 -; P9BE-NEXT: lxv 1, 0(3) +; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxmrghw 0, 4, 2 -; P9BE-NEXT: xxperm 3, 0, 1 +; P9BE-NEXT: xxmrghw 2, 4, 2 +; P9BE-NEXT: xxperm 3, 2, 0 ; P9BE-NEXT: xxspltw 2, 3, 1 ; P9BE-NEXT: vadduwm 2, 3, 2 ; P9BE-NEXT: vextuwlx 3, 3, 2 @@ -312,10 +312,10 @@ ; P9BE-AIX-NEXT: lxsihzx 0, 3, 4 ; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1 ; P9BE-AIX-NEXT: xxperm 2, 0, 1 -; P9BE-AIX-NEXT: lxv 1, 0(3) +; P9BE-AIX-NEXT: lxv 0, 0(3) ; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxmrghw 0, 4, 2 -; P9BE-AIX-NEXT: xxperm 3, 0, 1 +; P9BE-AIX-NEXT: xxmrghw 2, 4, 2 +; P9BE-AIX-NEXT: xxperm 3, 2, 0 ; P9BE-AIX-NEXT: xxspltw 2, 3, 1 ; P9BE-AIX-NEXT: vadduwm 2, 3, 2 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 @@ -395,13 +395,13 @@ ; P9LE-NEXT: vmrghb 2, 3, 2 ; P9LE-NEXT: addi 3, 3, .LCPI3_0@toc@l ; P9LE-NEXT: vmrglh 2, 2, 4 -; P9LE-NEXT: lxv 1, 0(3) +; P9LE-NEXT: lxv 0, 0(3) ; P9LE-NEXT: li 3, 0 ; P9LE-NEXT: vmrghb 3, 3, 5 ; P9LE-NEXT: xxmrglw 2, 2, 4 ; P9LE-NEXT: vmrglh 3, 3, 4 -; P9LE-NEXT: xxmrglw 0, 4, 3 -; P9LE-NEXT: xxperm 2, 0, 1 +; P9LE-NEXT: xxmrglw 3, 4, 3 +; P9LE-NEXT: xxperm 2, 3, 0 ; P9LE-NEXT: xxspltw 3, 2, 2 ; P9LE-NEXT: vadduwm 2, 2, 3 ; P9LE-NEXT: vextuwrx 3, 3, 2 diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -184,11 +184,12 @@ ; CHECK-LE-P8-LABEL: test_none_v16i8: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: mtvsrd v4, r3 ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 ; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P8-NEXT: blr ; @@ -431,11 +432,12 @@ ; CHECK-LE-P8-LABEL: test_none_v8i16: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha -; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: mtvsrd v4, r3 ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 ; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 ; CHECK-LE-P8-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -389,10 +389,10 @@ ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) -; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2 +; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -411,10 +411,10 @@ ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) -; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2 +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; @@ -470,14 +470,15 @@ ; CHECK-LE-P8-LABEL: test_none_v2i64: ; CHECK-LE-P8: # %bb.0: # %entry ; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 -; CHECK-LE-P8-NEXT: lxvd2x v3, 0, r4 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 -; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs1 ; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 ; CHECK-LE-P8-NEXT: xxswapd v3, vs0 ; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 @@ -544,10 +545,10 @@ ; CHECK-AIX-64-P9: # %bb.0: # %entry ; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 -; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3) -; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr @@ -603,10 +604,10 @@ ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: lfd f0, 0(r3) ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs2, 0(r3) -; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2 +; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1 ; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-LE-P9-NEXT: blr ; @@ -625,10 +626,10 @@ ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: lfd f0, 0(r3) ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs2, 0(r3) -; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2 +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1 ; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) ; CHECK-BE-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll --- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -203,13 +203,13 @@ ; ; CHECK-P9-LABEL: test2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 -; CHECK-P9-NEXT: xvcvuxddp vs0, vs1 +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-P9-NEXT: xvcvuxddp vs0, vs0 ; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -80,13 +80,13 @@ ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprd f1, r3 +; CHECK-BE-NEXT: mtfprd f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 -; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs0 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <4 x i16> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -24,24 +24,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f1, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv vs2, 0(r3) -; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 -; CHECK-P9-NEXT: xvcvuxddp v2, vs1 +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-P9-NEXT: xvcvuxddp v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mtfprwz f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 -; CHECK-BE-NEXT: xvcvuxddp v2, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-NEXT: xvcvuxddp v2, vs0 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <2 x i16> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -76,24 +76,24 @@ ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f1, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P9-NEXT: lxv vs2, 0(r3) -; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 -; CHECK-P9-NEXT: xvcvuxwsp v2, vs1 +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-P9-NEXT: xvcvuxwsp v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mtfprwz f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 -; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs0 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <4 x i8> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -24,24 +24,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f1, r3 +; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv vs2, 0(r3) -; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 -; CHECK-P9-NEXT: xvcvuxddp v2, vs1 +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxperm vs0, vs2, vs1 +; CHECK-P9-NEXT: xvcvuxddp v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mtfprwz f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv vs2, 0(r3) -; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 -; CHECK-BE-NEXT: xvcvuxddp v2, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r3) +; CHECK-BE-NEXT: xxperm vs0, vs2, vs1 +; CHECK-BE-NEXT: xvcvuxddp v2, vs0 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i16 %a.coerce to <2 x i8> diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -7833,10 +7833,10 @@ ; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI173_0@toc@ha -; PC64LE9-NEXT: xxlxor 0, 0, 0 +; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI173_0@toc@l -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: xvcvuxddp 34, 34 ; PC64LE9-NEXT: blr entry: