diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1341,6 +1341,21 @@ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); } +def WToDPExtractConv { + dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0)))); + dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1)))); + dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2)))); + dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3)))); + dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0)))); + dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1)))); + dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2)))); + dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3)))); + dag BV02S = (v2f64 (build_vector El0S, El2S)); + dag BV13S = (v2f64 (build_vector El1S, El3S)); + dag BV02U = (v2f64 (build_vector El0U, El2U)); + dag BV13U = (v2f64 (build_vector El1U, El3U)); +} + // The following VSX instructions were introduced in Power ISA 2.07 /* FIXME: if the operands are v2i64, these patterns will not match. we should define new patterns or otherwise match the same patterns @@ -4159,6 +4174,41 @@ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, ExtDbl.B0U, ExtDbl.B1U)), (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 1))))), + (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 0))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), + (XVCVSPDP (XXMRGHW $A, $A)), 2))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XVCVSPDP $A))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), + (XVCVSPDP (XXMRGLW $A, $A)), 2))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$B, 0))))), + (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$B, 3))))), + (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3), + (XXPERMDI $A, $B, 3), 1)))>; + def : Pat; + def : Pat; + def : Pat; + def : Pat; } let Predicates = [IsLittleEndian, HasP8Vector] in { @@ -4237,6 +4287,41 @@ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, ExtDbl.B0U, ExtDbl.B1U)), (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 1))))), + (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 0))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), + (XVCVSPDP (XXMRGLW $A, $A)), 2))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP $A))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), + (XVCVSPDP (XXMRGHW $A, $A)), 2))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$B, 0))))), + (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3), + (XXPERMDI $B, $A, 3), 1)))>; + def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$B, 3))))), + (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>; + def : Pat; + def : Pat; + def : Pat; + def : Pat; } let Predicates = [HasDirectMove] in { diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -6123,3 +6123,412 @@ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat } + +; Some additional patterns that come up in real code. +define dso_local <2 x double> @sint_to_fp_widen02(<4 x i32> %a) { +; P9BE-LABEL: sint_to_fp_widen02: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xvcvsxwdp v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: sint_to_fp_widen02: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxsldwi vs0, v2, v2, 1 +; P9LE-NEXT: xvcvsxwdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: sint_to_fp_widen02: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xvcvsxwdp v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: sint_to_fp_widen02: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxsldwi vs0, v2, v2, 1 +; P8LE-NEXT: xvcvsxwdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @sint_to_fp_widen13(<4 x i32> %a) { +; P9BE-LABEL: sint_to_fp_widen13: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P9BE-NEXT: xvcvsxwdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: sint_to_fp_widen13: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xvcvsxwdp v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: sint_to_fp_widen13: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P8BE-NEXT: xvcvsxwdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: sint_to_fp_widen13: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xvcvsxwdp v2, v2 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = sitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 3 + %conv2 = sitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @uint_to_fp_widen02(<4 x i32> %a) { +; P9BE-LABEL: uint_to_fp_widen02: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xvcvuxwdp v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: uint_to_fp_widen02: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxsldwi vs0, v2, v2, 1 +; P9LE-NEXT: xvcvuxwdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: uint_to_fp_widen02: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xvcvuxwdp v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: uint_to_fp_widen02: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxsldwi vs0, v2, v2, 1 +; P8LE-NEXT: xvcvuxwdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = uitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @uint_to_fp_widen13(<4 x i32> %a) { +; P9BE-LABEL: uint_to_fp_widen13: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P9BE-NEXT: xvcvuxwdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: uint_to_fp_widen13: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xvcvuxwdp v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: uint_to_fp_widen13: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P8BE-NEXT: xvcvuxwdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: uint_to_fp_widen13: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xvcvuxwdp v2, v2 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = uitofp i32 %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 3 + %conv2 = uitofp i32 %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend01(<4 x float> %a) { +; P9BE-LABEL: fp_extend01: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrghw vs0, v2, v2 +; P9BE-NEXT: xvcvspdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend01: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrglw vs0, v2, v2 +; P9LE-NEXT: xvcvspdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend01: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrghw vs0, v2, v2 +; P8BE-NEXT: xvcvspdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend01: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrglw vs0, v2, v2 +; P8LE-NEXT: xvcvspdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 0 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 1 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend10(<4 x float> %a) { +; P9BE-LABEL: fp_extend10: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrghw vs0, v2, v2 +; P9BE-NEXT: xvcvspdp vs0, vs0 +; P9BE-NEXT: xxswapd v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend10: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrglw vs0, v2, v2 +; P9LE-NEXT: xvcvspdp vs0, vs0 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend10: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrghw vs0, v2, v2 +; P8BE-NEXT: xvcvspdp vs0, vs0 +; P8BE-NEXT: xxswapd v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend10: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrglw vs0, v2, v2 +; P8LE-NEXT: xvcvspdp vs0, vs0 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 1 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 0 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend02(<4 x float> %a) { +; P9BE-LABEL: fp_extend02: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xvcvspdp v2, v2 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend02: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxsldwi vs0, v2, v2, 3 +; P9LE-NEXT: xvcvspdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend02: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xvcvspdp v2, v2 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend02: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxsldwi vs0, v2, v2, 3 +; P8LE-NEXT: xvcvspdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 0 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 2 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend13(<4 x float> %a) { +; P9BE-LABEL: fp_extend13: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P9BE-NEXT: xvcvspdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend13: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xvcvspdp v2, v2 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend13: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxsldwi vs0, v2, v2, 3 +; P8BE-NEXT: xvcvspdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend13: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xvcvspdp v2, v2 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 1 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 3 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend23(<4 x float> %a) { +; P9BE-LABEL: fp_extend23: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrglw vs0, v2, v2 +; P9BE-NEXT: xvcvspdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend23: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrghw vs0, v2, v2 +; P9LE-NEXT: xvcvspdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend23: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrglw vs0, v2, v2 +; P8BE-NEXT: xvcvspdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend23: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrghw vs0, v2, v2 +; P8LE-NEXT: xvcvspdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 2 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 3 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend32(<4 x float> %a) { +; P9BE-LABEL: fp_extend32: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrglw vs0, v2, v2 +; P9BE-NEXT: xvcvspdp vs0, vs0 +; P9BE-NEXT: xxswapd v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend32: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrghw vs0, v2, v2 +; P9LE-NEXT: xvcvspdp vs0, vs0 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend32: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrglw vs0, v2, v2 +; P8BE-NEXT: xvcvspdp vs0, vs0 +; P8BE-NEXT: xxswapd v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend32: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrghw vs0, v2, v2 +; P8LE-NEXT: xvcvspdp vs0, vs0 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 3 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %a, i32 2 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend_two00(<4 x float> %a, <4 x float> %b) { +; P9BE-LABEL: fp_extend_two00: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrghd vs0, v2, v3 +; P9BE-NEXT: xvcvspdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend_two00: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrgld vs0, v3, v2 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; P9LE-NEXT: xvcvspdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend_two00: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrghd vs0, v2, v3 +; P8BE-NEXT: xvcvspdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend_two00: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrgld vs0, v3, v2 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; P8LE-NEXT: xvcvspdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 0 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %b, i32 0 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} + +define dso_local <2 x double> @fp_extend_two33(<4 x float> %a, <4 x float> %b) { +; P9BE-LABEL: fp_extend_two33: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: xxmrgld vs0, v2, v3 +; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; P9BE-NEXT: xvcvspdp v2, vs0 +; P9BE-NEXT: blr +; +; P9LE-LABEL: fp_extend_two33: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: xxmrghd vs0, v3, v2 +; P9LE-NEXT: xvcvspdp v2, vs0 +; P9LE-NEXT: blr +; +; P8BE-LABEL: fp_extend_two33: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: xxmrgld vs0, v2, v3 +; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; P8BE-NEXT: xvcvspdp v2, vs0 +; P8BE-NEXT: blr +; +; P8LE-LABEL: fp_extend_two33: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: xxmrghd vs0, v3, v2 +; P8LE-NEXT: xvcvspdp v2, vs0 +; P8LE-NEXT: blr +entry: + %vecext = extractelement <4 x float> %a, i32 3 + %conv = fpext float %vecext to double + %vecinit = insertelement <2 x double> undef, double %conv, i32 0 + %vecext1 = extractelement <4 x float> %b, i32 3 + %conv2 = fpext float %vecext1 to double + %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1 + ret <2 x double> %vecinit3 +} diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization02.ll @@ -47,33 +47,23 @@ ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv vs0, 0(r3) -; CHECK-NEXT: xxsldwi vs1, vs0, vs0, 1 -; CHECK-NEXT: xscvspdpn f2, vs0 -; CHECK-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: xscvspdpn f1, vs1 -; CHECK-NEXT: xscvspdpn f3, vs3 -; CHECK-NEXT: xscvspdpn f0, vs0 -; CHECK-NEXT: xxmrghd vs0, vs0, vs3 -; CHECK-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-NEXT: stxv vs0, 0(r4) -; CHECK-NEXT: stxv vs1, 0(r5) +; CHECK-NEXT: xxmrglw vs1, vs0, vs0 +; CHECK-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-NEXT: xvcvspdp vs1, vs1 +; CHECK-NEXT: xvcvspdp vs0, vs0 +; CHECK-NEXT: stxv vs1, 0(r4) +; CHECK-NEXT: stxv vs0, 0(r5) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: xscvspdpn f3, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xxmrghd vs0, vs3, vs0 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2 -; CHECK-BE-NEXT: stxv vs0, 0(r4) -; CHECK-BE-NEXT: stxv vs1, 0(r5) +; CHECK-BE-NEXT: xxmrghw vs1, vs0, vs0 +; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-BE-NEXT: stxv vs1, 0(r4) +; CHECK-BE-NEXT: stxv vs0, 0(r5) ; CHECK-BE-NEXT: blr entry: %0 = load <16 x float>, <16 x float>* %a, align 16 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll @@ -14,10 +14,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 ; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: blr ; @@ -25,20 +23,16 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd f0, r3 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 ; CHECK-BE-NEXT: xvcvdpuxds v2, vs0 ; CHECK-BE-NEXT: blr entry: @@ -50,16 +44,11 @@ define void @test4elt(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs1 ; CHECK-P8-NEXT: xxswapd vs1, v2 @@ -70,36 +59,26 @@ ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xxswapd vs1, v2 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, v2 -; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2 +; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 ; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxmrghw vs0, v2, v2 +; CHECK-BE-NEXT: xxmrglw vs1, v2, v2 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr entry: %0 = fptoui <4 x float> %a to <4 x i64> @@ -115,31 +94,21 @@ ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v2, r4, r5 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 -; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5 +; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 +; CHECK-P8-NEXT: xxmrghw vs3, v3, v3 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 +; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 -; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs1 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs3 +; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs2, v5 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 @@ -149,65 +118,45 @@ ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0 +; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1 +; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1 +; CHECK-P9-NEXT: xxmrglw vs3, vs0, vs0 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs2, vs2 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 +; CHECK-P9-NEXT: xvcvspdp vs3, vs3 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 -; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2 -; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: stxv vs0, 48(r3) ; CHECK-P9-NEXT: stxv vs3, 32(r3) -; CHECK-P9-NEXT: stxv vs2, 48(r3) -; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: lxv vs0, 16(r4) -; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3 -; CHECK-BE-NEXT: xscvspdpn f3, vs0 -; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4 -; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4 +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1 +; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1 +; CHECK-BE-NEXT: xxmrghw vs3, vs0, vs0 +; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs2, vs2 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 +; CHECK-BE-NEXT: xvcvspdp vs3, vs3 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 ; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 -; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: stxv vs3, 32(r3) ; CHECK-BE-NEXT: stxv vs0, 48(r3) +; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: blr entry: @@ -220,70 +169,50 @@ define void @test16elt(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v4, 0, r4 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v5, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r7 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: lvx v4, r4, r7 +; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: xxmrghw vs3, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs5, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs11, v3 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xxswapd vs8, v2 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxswapd v2, v4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 -; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xscvspdpn f9, v3 -; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10 -; CHECK-P8-NEXT: xvcvdpuxds v3, vs4 -; CHECK-P8-NEXT: xvcvdpuxds v2, vs1 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12 -; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13 +; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 +; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 +; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xxmrglw vs6, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs7, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs5, vs5 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 +; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs4, vs4 +; CHECK-P8-NEXT: xvcvspdp vs6, vs6 +; CHECK-P8-NEXT: xvcvspdp vs7, vs7 +; CHECK-P8-NEXT: xvcvdpuxds v3, vs3 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs5 -; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9 -; CHECK-P8-NEXT: xvcvdpuxds v0, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v0, vs4 ; CHECK-P8-NEXT: xvcvdpuxds v1, vs2 -; CHECK-P8-NEXT: xvcvdpuxds v6, vs0 +; CHECK-P8-NEXT: xvcvdpuxds v6, vs6 ; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: xvcvdpuxds v7, vs3 -; CHECK-P8-NEXT: xxswapd vs4, v2 -; CHECK-P8-NEXT: xxswapd vs3, v4 +; CHECK-P8-NEXT: xvcvdpuxds v7, vs7 ; CHECK-P8-NEXT: xxswapd vs1, v5 +; CHECK-P8-NEXT: xxswapd vs4, v2 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 96 +; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs2, v0 -; CHECK-P8-NEXT: xxswapd vs0, v1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xxswapd vs5, v6 ; CHECK-P8-NEXT: li r4, 80 +; CHECK-P8-NEXT: xxswapd vs0, v1 +; CHECK-P8-NEXT: xxswapd vs5, v6 ; CHECK-P8-NEXT: xxswapd vs1, v7 ; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r8 @@ -295,122 +224,82 @@ ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3 -; CHECK-P9-NEXT: xxswapd vs6, vs4 -; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: xscvspdpn f5, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs4 -; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1 -; CHECK-P9-NEXT: lxv vs3, 32(r4) -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0 -; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: lxv vs3, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) +; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1 +; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1 +; CHECK-P9-NEXT: xxmrglw vs4, vs3, vs3 +; CHECK-P9-NEXT: xxmrghw vs3, vs3, vs3 +; CHECK-P9-NEXT: xxmrglw vs6, vs5, vs5 +; CHECK-P9-NEXT: xxmrghw vs5, vs5, vs5 +; CHECK-P9-NEXT: xxmrglw vs7, vs0, vs0 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs2, vs2 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 +; CHECK-P9-NEXT: xvcvspdp vs4, vs4 +; CHECK-P9-NEXT: xvcvspdp vs3, vs3 +; CHECK-P9-NEXT: xvcvspdp vs6, vs6 +; CHECK-P9-NEXT: xvcvspdp vs5, vs5 +; CHECK-P9-NEXT: xvcvspdp vs7, vs7 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 -; CHECK-P9-NEXT: xscvspdpn f7, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: lxv vs2, 48(r4) -; CHECK-P9-NEXT: xxswapd vs8, vs2 -; CHECK-P9-NEXT: xscvspdpn f8, vs8 ; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3 -; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 -; CHECK-P9-NEXT: stxv vs6, 64(r3) -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7 -; CHECK-P9-NEXT: xscvspdpn f8, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-P9-NEXT: stxv vs3, 80(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: stxv vs0, 112(r3) ; CHECK-P9-NEXT: stxv vs7, 96(r3) -; CHECK-P9-NEXT: stxv vs2, 112(r3) -; CHECK-P9-NEXT: stxv vs4, 48(r3) -; CHECK-P9-NEXT: stxv vs5, 32(r3) -; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: stxv vs5, 80(r3) +; CHECK-P9-NEXT: stxv vs6, 64(r3) +; CHECK-P9-NEXT: stxv vs3, 48(r3) +; CHECK-P9-NEXT: stxv vs4, 32(r3) +; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: lxv vs4, 16(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1 -; CHECK-BE-NEXT: xscvspdpn f6, vs6 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5 -; CHECK-BE-NEXT: xscvspdpn f5, vs4 -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6 -; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvspdpn f6, vs6 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2 -; CHECK-BE-NEXT: lxv vs2, 48(r4) -; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1 +; CHECK-BE-NEXT: lxv vs0, 48(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs3, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 32(r4) +; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1 +; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1 +; CHECK-BE-NEXT: xxmrghw vs4, vs3, vs3 +; CHECK-BE-NEXT: xxmrglw vs3, vs3, vs3 +; CHECK-BE-NEXT: xxmrghw vs6, vs5, vs5 +; CHECK-BE-NEXT: xxmrglw vs5, vs5, vs5 +; CHECK-BE-NEXT: xxmrghw vs7, vs0, vs0 +; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs2, vs2 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 +; CHECK-BE-NEXT: xvcvspdp vs4, vs4 +; CHECK-BE-NEXT: xvcvspdp vs3, vs3 +; CHECK-BE-NEXT: xvcvspdp vs6, vs6 +; CHECK-BE-NEXT: xvcvspdp vs5, vs5 +; CHECK-BE-NEXT: xvcvspdp vs7, vs7 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 -; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6 -; CHECK-BE-NEXT: xscvspdpn f6, vs3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7 -; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7 -; CHECK-BE-NEXT: xscvspdpn f7, vs2 -; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8 -; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8 -; CHECK-BE-NEXT: stxv vs5, 32(r3) ; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4 -; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 ; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-BE-NEXT: stxv vs3, 80(r3) +; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-BE-NEXT: stxv vs0, 112(r3) ; CHECK-BE-NEXT: stxv vs7, 96(r3) -; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 -; CHECK-BE-NEXT: stxv vs2, 112(r3) +; CHECK-BE-NEXT: stxv vs5, 80(r3) ; CHECK-BE-NEXT: stxv vs6, 64(r3) -; CHECK-BE-NEXT: stxv vs4, 48(r3) -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs4, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -424,10 +313,8 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 ; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: blr ; @@ -435,20 +322,16 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 ; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd f0, r3 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-BE-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 ; CHECK-BE-NEXT: xvcvdpuxds v2, vs0 ; CHECK-BE-NEXT: blr entry: @@ -460,16 +343,11 @@ define void @test4elt_signed(<4 x i64>* noalias nocapture sret %agg.result, <4 x float> %a) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs1 ; CHECK-P8-NEXT: xxswapd vs1, v2 @@ -480,36 +358,26 @@ ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P9-NEXT: xxswapd vs1, v2 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, v2, v2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, v2 -; CHECK-P9-NEXT: xxmrghd vs1, vs1, vs2 +; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 ; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: xxswapd vs2, v2 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxmrghw vs0, v2, v2 +; CHECK-BE-NEXT: xxmrglw vs1, v2, v2 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: xxmrghd vs1, vs2, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr entry: %0 = fptoui <4 x float> %a to <4 x i64> @@ -525,31 +393,21 @@ ; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v2, r4, r5 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvspdpn f6, vs6 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs3 -; CHECK-P8-NEXT: xxmrghd vs2, vs6, vs5 +; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 +; CHECK-P8-NEXT: xxmrghw vs3, v3, v3 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 +; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 -; CHECK-P8-NEXT: xxmrghd vs3, vs4, vs7 ; CHECK-P8-NEXT: xvcvdpuxds v3, vs1 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs2 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs3 +; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs1, v2 ; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs2, v5 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r6 @@ -559,65 +417,45 @@ ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: xxmrghd vs0, vs3, vs0 +; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1 +; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1 +; CHECK-P9-NEXT: xxmrglw vs3, vs0, vs0 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs2, vs2 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 +; CHECK-P9-NEXT: xvcvspdp vs3, vs3 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: xxmrghd vs3, vs4, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 -; CHECK-P9-NEXT: xxmrghd vs2, vs4, vs2 -; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: stxv vs0, 48(r3) ; CHECK-P9-NEXT: stxv vs3, 32(r3) -; CHECK-P9-NEXT: stxv vs2, 48(r3) -; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: lxv vs0, 16(r4) -; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs3 -; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs3 -; CHECK-BE-NEXT: xscvspdpn f3, vs0 -; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs4 -; CHECK-BE-NEXT: xxsldwi vs4, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs4 +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1 +; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1 +; CHECK-BE-NEXT: xxmrghw vs3, vs0, vs0 +; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs2, vs2 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 +; CHECK-BE-NEXT: xvcvspdp vs3, vs3 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 ; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 -; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: stxv vs3, 32(r3) ; CHECK-BE-NEXT: stxv vs0, 48(r3) +; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: blr entry: @@ -630,70 +468,50 @@ define void @test16elt_signed(<16 x i64>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: li r5, 16 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lvx v4, 0, r4 ; CHECK-P8-NEXT: li r8, 64 -; CHECK-P8-NEXT: lvx v5, r4, r5 -; CHECK-P8-NEXT: lvx v3, r4, r7 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: lvx v4, r4, r7 +; CHECK-P8-NEXT: lvx v2, r4, r5 +; CHECK-P8-NEXT: lvx v3, r4, r6 +; CHECK-P8-NEXT: xxmrghw vs3, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs5, v4, v4 +; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs1, v2, v2 +; CHECK-P8-NEXT: lvx v2, 0, r4 ; CHECK-P8-NEXT: li r4, 112 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xxsldwi vs10, v3, v3, 3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs11, v3 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs7, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xxswapd vs8, v2 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxswapd v2, v4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 -; CHECK-P8-NEXT: xxmrghd vs1, vs3, vs1 -; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxmrghd vs4, vs4, vs9 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xxmrghd vs0, vs0, vs5 -; CHECK-P8-NEXT: xscvspdpn f3, v2 -; CHECK-P8-NEXT: xscvspdpn f9, v3 -; CHECK-P8-NEXT: xxmrghd vs5, vs11, vs10 -; CHECK-P8-NEXT: xvcvdpuxds v3, vs4 -; CHECK-P8-NEXT: xvcvdpuxds v2, vs1 -; CHECK-P8-NEXT: xxmrghd vs1, vs2, vs12 -; CHECK-P8-NEXT: xxmrghd vs2, vs8, vs7 -; CHECK-P8-NEXT: xvcvdpuxds v4, vs0 -; CHECK-P8-NEXT: xxmrghd vs0, vs3, vs13 +; CHECK-P8-NEXT: xxmrglw vs2, v3, v3 +; CHECK-P8-NEXT: xxmrghw vs4, v3, v3 +; CHECK-P8-NEXT: xvcvspdp vs3, vs3 +; CHECK-P8-NEXT: xxmrglw vs6, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs7, v2, v2 +; CHECK-P8-NEXT: xvcvspdp vs5, vs5 +; CHECK-P8-NEXT: xvcvspdp vs0, vs0 +; CHECK-P8-NEXT: xvcvspdp vs1, vs1 +; CHECK-P8-NEXT: xvcvspdp vs2, vs2 +; CHECK-P8-NEXT: xvcvspdp vs4, vs4 +; CHECK-P8-NEXT: xvcvspdp vs6, vs6 +; CHECK-P8-NEXT: xvcvspdp vs7, vs7 +; CHECK-P8-NEXT: xvcvdpuxds v3, vs3 ; CHECK-P8-NEXT: xvcvdpuxds v5, vs5 -; CHECK-P8-NEXT: xxmrghd vs3, vs6, vs9 -; CHECK-P8-NEXT: xvcvdpuxds v0, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 +; CHECK-P8-NEXT: xvcvdpuxds v4, vs1 +; CHECK-P8-NEXT: xvcvdpuxds v0, vs4 ; CHECK-P8-NEXT: xvcvdpuxds v1, vs2 -; CHECK-P8-NEXT: xvcvdpuxds v6, vs0 +; CHECK-P8-NEXT: xvcvdpuxds v6, vs6 ; CHECK-P8-NEXT: xxswapd vs0, v3 -; CHECK-P8-NEXT: xvcvdpuxds v7, vs3 -; CHECK-P8-NEXT: xxswapd vs4, v2 -; CHECK-P8-NEXT: xxswapd vs3, v4 +; CHECK-P8-NEXT: xvcvdpuxds v7, vs7 ; CHECK-P8-NEXT: xxswapd vs1, v5 +; CHECK-P8-NEXT: xxswapd vs4, v2 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r4 ; CHECK-P8-NEXT: li r4, 96 +; CHECK-P8-NEXT: xxswapd vs3, v4 ; CHECK-P8-NEXT: xxswapd vs2, v0 -; CHECK-P8-NEXT: xxswapd vs0, v1 ; CHECK-P8-NEXT: stxvd2x vs1, r3, r4 -; CHECK-P8-NEXT: xxswapd vs5, v6 ; CHECK-P8-NEXT: li r4, 80 +; CHECK-P8-NEXT: xxswapd vs0, v1 +; CHECK-P8-NEXT: xxswapd vs5, v6 ; CHECK-P8-NEXT: xxswapd vs1, v7 ; CHECK-P8-NEXT: stxvd2x vs2, r3, r4 ; CHECK-P8-NEXT: stxvd2x vs0, r3, r8 @@ -705,122 +523,82 @@ ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 16(r4) -; CHECK-P9-NEXT: xxsldwi vs5, vs4, vs4, 3 -; CHECK-P9-NEXT: xxswapd vs6, vs4 -; CHECK-P9-NEXT: lxv vs0, 0(r4) -; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs2, vs0 -; CHECK-P9-NEXT: xscvspdpn f5, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxmrghd vs5, vs6, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs4 -; CHECK-P9-NEXT: xxsldwi vs4, vs4, vs4, 1 -; CHECK-P9-NEXT: lxv vs3, 32(r4) -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xxmrghd vs1, vs2, vs1 -; CHECK-P9-NEXT: xscvspdpn f2, vs0 -; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xxmrghd vs0, vs2, vs0 -; CHECK-P9-NEXT: xxmrghd vs4, vs6, vs4 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: lxv vs3, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) +; CHECK-P9-NEXT: xxmrglw vs2, vs1, vs1 +; CHECK-P9-NEXT: xxmrghw vs1, vs1, vs1 +; CHECK-P9-NEXT: xxmrglw vs4, vs3, vs3 +; CHECK-P9-NEXT: xxmrghw vs3, vs3, vs3 +; CHECK-P9-NEXT: xxmrglw vs6, vs5, vs5 +; CHECK-P9-NEXT: xxmrghw vs5, vs5, vs5 +; CHECK-P9-NEXT: xxmrglw vs7, vs0, vs0 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-P9-NEXT: xvcvspdp vs2, vs2 +; CHECK-P9-NEXT: xvcvspdp vs1, vs1 +; CHECK-P9-NEXT: xvcvspdp vs4, vs4 +; CHECK-P9-NEXT: xvcvspdp vs3, vs3 +; CHECK-P9-NEXT: xvcvspdp vs6, vs6 +; CHECK-P9-NEXT: xvcvspdp vs5, vs5 +; CHECK-P9-NEXT: xvcvspdp vs7, vs7 +; CHECK-P9-NEXT: xvcvspdp vs0, vs0 +; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxmrghd vs6, vs7, vs6 -; CHECK-P9-NEXT: xscvspdpn f7, vs3 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: lxv vs2, 48(r4) -; CHECK-P9-NEXT: xxswapd vs8, vs2 -; CHECK-P9-NEXT: xscvspdpn f8, vs8 ; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xxmrghd vs3, vs7, vs3 -; CHECK-P9-NEXT: xxsldwi vs7, vs2, vs2, 3 -; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 -; CHECK-P9-NEXT: stxv vs6, 64(r3) -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xxmrghd vs7, vs8, vs7 -; CHECK-P9-NEXT: xscvspdpn f8, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xxmrghd vs2, vs8, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-P9-NEXT: stxv vs3, 80(r3) -; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 +; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-P9-NEXT: stxv vs0, 112(r3) ; CHECK-P9-NEXT: stxv vs7, 96(r3) -; CHECK-P9-NEXT: stxv vs2, 112(r3) -; CHECK-P9-NEXT: stxv vs4, 48(r3) -; CHECK-P9-NEXT: stxv vs5, 32(r3) -; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs1, 0(r3) +; CHECK-P9-NEXT: stxv vs5, 80(r3) +; CHECK-P9-NEXT: stxv vs6, 64(r3) +; CHECK-P9-NEXT: stxv vs3, 48(r3) +; CHECK-P9-NEXT: stxv vs4, 32(r3) +; CHECK-P9-NEXT: stxv vs1, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: lxv vs4, 16(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvspdpn f5, vs5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 1 -; CHECK-BE-NEXT: xscvspdpn f6, vs6 -; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs5 -; CHECK-BE-NEXT: xscvspdpn f5, vs4 -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 1 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xxmrghd vs5, vs5, vs6 -; CHECK-BE-NEXT: xxsldwi vs6, vs4, vs4, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvspdpn f6, vs6 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xxmrghd vs1, vs1, vs2 -; CHECK-BE-NEXT: lxv vs2, 48(r4) -; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 1 +; CHECK-BE-NEXT: lxv vs0, 48(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs3, 16(r4) +; CHECK-BE-NEXT: lxv vs5, 32(r4) +; CHECK-BE-NEXT: xxmrghw vs2, vs1, vs1 +; CHECK-BE-NEXT: xxmrglw vs1, vs1, vs1 +; CHECK-BE-NEXT: xxmrghw vs4, vs3, vs3 +; CHECK-BE-NEXT: xxmrglw vs3, vs3, vs3 +; CHECK-BE-NEXT: xxmrghw vs6, vs5, vs5 +; CHECK-BE-NEXT: xxmrglw vs5, vs5, vs5 +; CHECK-BE-NEXT: xxmrghw vs7, vs0, vs0 +; CHECK-BE-NEXT: xxmrglw vs0, vs0, vs0 +; CHECK-BE-NEXT: xvcvspdp vs2, vs2 +; CHECK-BE-NEXT: xvcvspdp vs1, vs1 +; CHECK-BE-NEXT: xvcvspdp vs4, vs4 +; CHECK-BE-NEXT: xvcvspdp vs3, vs3 +; CHECK-BE-NEXT: xvcvspdp vs6, vs6 +; CHECK-BE-NEXT: xvcvspdp vs5, vs5 +; CHECK-BE-NEXT: xvcvspdp vs7, vs7 +; CHECK-BE-NEXT: xvcvspdp vs0, vs0 +; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 -; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 -; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 -; CHECK-BE-NEXT: xxmrghd vs4, vs4, vs6 -; CHECK-BE-NEXT: xscvspdpn f6, vs3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: xxmrghd vs6, vs6, vs7 -; CHECK-BE-NEXT: xxsldwi vs7, vs3, vs3, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvspdpn f7, vs7 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xxmrghd vs3, vs3, vs7 -; CHECK-BE-NEXT: xscvspdpn f7, vs2 -; CHECK-BE-NEXT: xxmrghd vs7, vs7, vs8 -; CHECK-BE-NEXT: xxsldwi vs8, vs2, vs2, 3 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xscvspdpn f8, vs8 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xxmrghd vs2, vs2, vs8 -; CHECK-BE-NEXT: stxv vs5, 32(r3) ; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4 -; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6 ; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6 +; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 ; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7 -; CHECK-BE-NEXT: stxv vs3, 80(r3) +; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 +; CHECK-BE-NEXT: stxv vs0, 112(r3) ; CHECK-BE-NEXT: stxv vs7, 96(r3) -; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 -; CHECK-BE-NEXT: stxv vs2, 112(r3) +; CHECK-BE-NEXT: stxv vs5, 80(r3) ; CHECK-BE-NEXT: stxv vs6, 64(r3) -; CHECK-BE-NEXT: stxv vs4, 48(r3) -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs4, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -1554,11 +1554,8 @@ ; ; CHECK-LE-LABEL: test46: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-LE-NEXT: xxswapd vs1, v2 -; CHECK-LE-NEXT: xscvspdpn f0, vs0 -; CHECK-LE-NEXT: xscvspdpn f1, vs1 -; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-LE-NEXT: xxmrglw vs0, v2, v2 +; CHECK-LE-NEXT: xvcvspdp vs0, vs0 ; CHECK-LE-NEXT: xvcvdpuxds v2, vs0 ; CHECK-LE-NEXT: blr %v = fptoui <2 x float> %a to <2 x i64> @@ -1625,11 +1622,8 @@ ; ; CHECK-LE-LABEL: test47: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-LE-NEXT: xxswapd vs1, v2 -; CHECK-LE-NEXT: xscvspdpn f0, vs0 -; CHECK-LE-NEXT: xscvspdpn f1, vs1 -; CHECK-LE-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-LE-NEXT: xxmrglw vs0, v2, v2 +; CHECK-LE-NEXT: xvcvspdp vs0, vs0 ; CHECK-LE-NEXT: xvcvdpsxds v2, vs0 ; CHECK-LE-NEXT: blr %v = fptosi <2 x float> %a to <2 x i64>