Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -2550,6 +2550,44 @@ UseVSXReg; } // mayStore + let Predicates = [IsLittleEndian] in { + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; + } + + let Predicates = [IsBigEndian] in { + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; + def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; + } + // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), @@ -2560,6 +2598,14 @@ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), @@ -2587,6 +2633,14 @@ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), Index: llvm/trunk/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ llvm/trunk/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -439,6 +439,69 @@ ret float %conv } +; Verify we generate optimal code for unsigned vector int elem extract followed +; by conversion to double + +define double @conv2dlbTestui0(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dlbTestui0 +; CHECK: xxextractuw [[SW:[0-9]+]], 34, 12 +; CHECK: xscvuxddp 1, [[SW]] +; CHECK-BE-LABEL: conv2dlbTestui0 +; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 0 +; CHECK-BE: xscvuxddp 1, [[CP]] + %0 = extractelement <4 x i32> %a, i32 0 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui1(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dlbTestui1 +; CHECK: xxextractuw [[SW:[0-9]+]], 34, 8 +; CHECK: xscvuxddp 1, [[SW]] +; CHECK-BE-LABEL: conv2dlbTestui1 +; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 4 +; CHECK-BE: xscvuxddp 1, [[CP]] + %0 = extractelement <4 x i32> %a, i32 1 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui2(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dlbTestui2 +; CHECK: xxextractuw [[SW:[0-9]+]], 34, 4 +; CHECK: xscvuxddp 1, [[SW]] +; CHECK-BE-LABEL: conv2dlbTestui2 +; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 8 +; CHECK-BE: xscvuxddp 1, [[CP]] + %0 = extractelement <4 x i32> %a, i32 2 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui3(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dlbTestui3 +; CHECK: xxextractuw [[SW:[0-9]+]], 34, 0 +; CHECK: xscvuxddp 1, [[SW]] +; CHECK-BE-LABEL: conv2dlbTestui3 +; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 12 +; CHECK-BE: xscvuxddp 1, [[CP]] + %0 = extractelement <4 x i32> %a, i32 3 + %1 = uitofp i32 %0 to double + ret double %1 +} + +; verify we don't crash for variable elem extract +define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = uitofp i32 %vecext to double + ret double %conv +} + define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { entry: ; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ Index: llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll +++ llvm/trunk/test/CodeGen/PowerPC/remove-redundant-moves.ll @@ -105,3 +105,131 @@ %1 = uitofp i64 %0 to float ret float %1 } + +define float @conv2fltTesti0(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2fltTesti0 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 3 +; CHECK: xvcvsxwsp [[SW]], [[SW]] +; CHECK: xscvspdpn 1, [[SW]] +; CHECK-BE-LABEL: conv2fltTesti0 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0 +; CHECK-BE: xvcvsxwsp [[CP]], [[CP]] +; CHECK-BE: xscvspdpn 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to float + ret float %conv +} + +define float @conv2fltTesti1(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2fltTesti1 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 2 +; CHECK: xvcvsxwsp [[SW]], [[SW]] +; CHECK: xscvspdpn 1, [[SW]] +; CHECK-BE-LABEL: conv2fltTesti1 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1 +; CHECK-BE: xvcvsxwsp [[CP]], [[CP]] +; CHECK-BE: xscvspdpn 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = sitofp i32 %vecext to float + ret float %conv +} + +define float @conv2fltTesti2(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2fltTesti2 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 1 +; CHECK: xvcvsxwsp [[SW]], [[SW]] +; CHECK: xscvspdpn 1, [[SW]] +; CHECK-BE-LABEL: conv2fltTesti2 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2 +; CHECK-BE: xvcvsxwsp [[CP]], [[CP]] +; CHECK-BE: xscvspdpn 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = sitofp i32 %vecext to float + ret float %conv +} + +define float @conv2fltTesti3(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2fltTesti3 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 0 +; CHECK: xvcvsxwsp [[SW]], [[SW]] +; CHECK: xscvspdpn 1, [[SW]] +; CHECK-BE-LABEL: conv2fltTesti3 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3 +; CHECK-BE: xvcvsxwsp [[CP]], [[CP]] +; CHECK-BE: xscvspdpn 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = sitofp i32 %vecext to float + ret float %conv +} + +; verify we don't crash for variable elem extract +define float @conv2fltTestiVar(<4 x i32> %a, i32 zeroext %elem) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = sitofp i32 %vecext to float + ret float %conv +} + +define double @conv2dblTesti0(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dblTesti0 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 3 +; CHECK: xvcvsxwdp 1, [[SW]] +; CHECK-BE-LABEL: conv2dblTesti0 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0 +; CHECK-BE: xvcvsxwdp 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sitofp i32 %vecext to double + ret double %conv +} + +define double @conv2dblTesti1(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dblTesti1 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 2 +; CHECK: xvcvsxwdp 1, [[SW]] +; CHECK-BE-LABEL: conv2dblTesti1 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1 +; CHECK-BE: xvcvsxwdp 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = sitofp i32 %vecext to double + ret double %conv +} + +define double @conv2dblTesti2(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dblTesti2 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 1 +; CHECK: xvcvsxwdp 1, [[SW]] +; CHECK-BE-LABEL: conv2dblTesti2 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2 +; CHECK-BE: xvcvsxwdp 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = sitofp i32 %vecext to double + ret double %conv +} + +define double @conv2dblTesti3(<4 x i32> %a) { +entry: +; CHECK-LABEL: conv2dblTesti3 +; CHECK: xxspltw [[SW:[0-9]+]], 34, 0 +; CHECK: xvcvsxwdp 1, [[SW]] +; CHECK-BE-LABEL: conv2dblTesti3 +; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3 +; CHECK-BE: xvcvsxwdp 1, [[CP]] + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = sitofp i32 %vecext to double + ret double %conv +} + +; verify we don't crash for variable elem extract +define double @conv2dblTestiVar(<4 x i32> %a, i32 zeroext %elem) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = sitofp i32 %vecext to double + ret double %conv +}