diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2809,6 +2809,10 @@ def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>; +def : Pat<(v4i32 (PPCSToV DblToInt.A)), + (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>; +def : Pat<(v4i32 (PPCSToV DblToUInt.A)), + (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>; defm : ScalToVecWPermute< v4i32, FltToIntLoad.A, (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1), @@ -4138,12 +4142,52 @@ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 0))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 0))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 4))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 8))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 12))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 12))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), @@ -4382,12 +4426,52 @@ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 12))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 12))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 8))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 4))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPSXWS f64:$B), sub_64), + 0))>; +def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)), + (v4i32 (XXINSERTW v4i32:$A, + (SUBREG_TO_REG (i64 1), + (XSCVDPUXWS f64:$B), sub_64), + 0))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), diff --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll --- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll +++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll @@ -17,8 +17,8 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-BE-P9 -; xscvdpsxws and uxws is only available on Power7 and above -; Codgen is different for LE Power7 and Power8 +; xscvdpsxws and xscvdpsxws is only available on Power7 and above +; Codgen is different for Power7, Power8, and Power9. define dso_local <4 x i32> @test(<4 x i32> %a, double %b) { ; CHECK-LE-P7-LABEL: test: @@ -38,20 +38,16 @@ ; ; CHECK-LE-P8-LABEL: test: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpsxws f0, f1 +; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lvx v3, 0, r3 -; CHECK-LE-P8-NEXT: mffprwz r4, f0 -; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 -; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1 -; CHECK-LE-P9-NEXT: mffprwz r3, f0 -; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-LE-P9-NEXT: blr ; @@ -70,9 +66,7 @@ ; ; CHECK-BE-P8-LABEL: test: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1 -; CHECK-BE-P8-NEXT: mffprwz r3, f0 -; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3 ; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3 ; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1 @@ -81,8 +75,6 @@ ; CHECK-BE-P9-LABEL: test: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1 -; CHECK-BE-P9-NEXT: mffprwz r3, f0 -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 ; CHECK-BE-P9-NEXT: blr entry: @@ -109,20 +101,16 @@ ; ; CHECK-LE-P8-LABEL: test2: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpsxws f0, f1 +; CHECK-LE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-LE-P8-NEXT: lvx v3, 0, r3 -; CHECK-LE-P8-NEXT: mffprwz r4, f0 -; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 -; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test2: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: xscvdpsxws f0, f1 -; CHECK-LE-P9-NEXT: mffprwz r3, f0 -; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-LE-P9-NEXT: blr ; @@ -141,9 +129,7 @@ ; ; CHECK-BE-P8-LABEL: test2: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1 -; CHECK-BE-P8-NEXT: mffprwz r3, f0 -; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1 ; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3 ; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3 ; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1 @@ -152,8 +138,6 @@ ; CHECK-BE-P9-LABEL: test2: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: xscvdpsxws f0, f1 -; CHECK-BE-P9-NEXT: mffprwz r3, f0 -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 ; CHECK-BE-P9-NEXT: blr entry: @@ -180,20 +164,16 @@ ; ; CHECK-LE-P8-LABEL: test3: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpuxws f0, f1 +; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-LE-P8-NEXT: lvx v3, 0, r3 -; CHECK-LE-P8-NEXT: mffprwz r4, f0 -; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 -; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test3: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1 -; CHECK-LE-P9-NEXT: mffprwz r3, f0 -; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-LE-P9-NEXT: blr ; @@ -212,9 +192,7 @@ ; ; CHECK-BE-P8-LABEL: test3: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1 -; CHECK-BE-P8-NEXT: mffprwz r3, f0 -; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3 ; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3 ; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1 @@ -223,8 +201,6 @@ ; CHECK-BE-P9-LABEL: test3: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1 -; CHECK-BE-P9-NEXT: mffprwz r3, f0 -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 ; CHECK-BE-P9-NEXT: blr entry: @@ -251,20 +227,16 @@ ; ; CHECK-LE-P8-LABEL: test4: ; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: xscvdpuxws f0, f1 +; CHECK-LE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-LE-P8-NEXT: lvx v3, 0, r3 -; CHECK-LE-P8-NEXT: mffprwz r4, f0 -; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 -; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 ; CHECK-LE-P8-NEXT: blr ; ; CHECK-LE-P9-LABEL: test4: ; CHECK-LE-P9: # %bb.0: # %entry ; CHECK-LE-P9-NEXT: xscvdpuxws f0, f1 -; CHECK-LE-P9-NEXT: mffprwz r3, f0 -; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-LE-P9-NEXT: blr ; @@ -283,9 +255,7 @@ ; ; CHECK-BE-P8-LABEL: test4: ; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1 -; CHECK-BE-P8-NEXT: mffprwz r3, f0 -; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1 ; CHECK-BE-P8-NEXT: vmrghw v3, v2, v3 ; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3 ; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1 @@ -294,8 +264,6 @@ ; CHECK-BE-P9-LABEL: test4: ; CHECK-BE-P9: # %bb.0: # %entry ; CHECK-BE-P9-NEXT: xscvdpuxws f0, f1 -; CHECK-BE-P9-NEXT: mffprwz r3, f0 -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 ; CHECK-BE-P9-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -13,12 +13,8 @@ ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: xscvdpuxws f1, v2 -; CHECK-P8-NEXT: xscvdpuxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrwz v2, r3 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: xscvdpuxws v2, v2 +; CHECK-P8-NEXT: xscvdpuxws v3, f0 ; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -26,26 +22,18 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpuxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrwz v3, r3 -; CHECK-P9-NEXT: xscvdpuxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: xscvdpuxws v3, v2 +; CHECK-P9-NEXT: xscvdpuxws v2, f0 ; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpuxws f0, v2 -; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvdpuxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvdpuxws v3, v2 +; CHECK-BE-NEXT: xscvdpuxws v2, f0 ; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr @@ -305,12 +293,8 @@ ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: xxswapd vs0, v2 -; CHECK-P8-NEXT: xscvdpsxws f1, v2 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mffprwz r3, f1 -; CHECK-P8-NEXT: mtvsrwz v2, r3 -; CHECK-P8-NEXT: mffprwz r4, f0 -; CHECK-P8-NEXT: mtvsrwz v3, r4 +; CHECK-P8-NEXT: xscvdpsxws v2, v2 +; CHECK-P8-NEXT: xscvdpsxws v3, f0 ; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mffprd r3, f0 @@ -318,26 +302,18 @@ ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xscvdpsxws f0, v2 -; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrwz v3, r3 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: xscvdpsxws v3, v2 +; CHECK-P9-NEXT: xscvdpsxws v2, f0 ; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 -; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvdpsxws v3, v2 +; CHECK-BE-NEXT: xscvdpsxws v2, f0 ; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr