Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1266,16 +1266,14 @@ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); } -/* Direct moves of various widths from VSR's to GPR's. Each moves the - respective element out of the VSR and ensures that it is lined up - to the right side of the GPR. In addition to the extraction from positions - specified by a constant, a pattern for extracting from a variable position - is provided. This is useful when the element number is not known at - compile time. +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. The numbering for the DAG's is for LE, but when used on BE, the correct LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). */ -def MovesFromVSR { +def VectorExtractions { // Doubleword extraction dag LE_DWORD_0 = (MFVSRD @@ -1371,24 +1369,6 @@ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), sub_32); - /* BE variable byte - The algorithm here is the same as the LE variable byte except: - - The shift in the VMX register is by 0/8 for opposite element numbers so - we simply AND the element number with 0x8 - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-7 - */ - dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); - dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); - dag BE_MV_VBYTE = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), - sub_64)); - dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), - sub_32); - dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), - sub_32); - /* LE variable halfword Number 1. above: - For elements 0-3, we shift left by 8 since they're on the right @@ -1421,6 +1401,88 @@ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), sub_32); + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); + + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); + + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + LE_VDWORD_PERM_VEC); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); + dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + /* BE variable halfword The algorithm here is the same as the LE variable halfword except: - The shift in the VMX register is by 0/8 for opposite element numbers so @@ -1434,10 +1496,54 @@ (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), sub_64)); - dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 60), + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), sub_32); dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), sub_32); + + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61)); + dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); + + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60)); + dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61)); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } // v4f32 scalar <-> vector conversions (BE) @@ -1452,8 +1558,15 @@ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; } // IsBigEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsBigEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + let Predicates = [IsBigEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (BE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1465,75 +1578,79 @@ def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_BYTE)>; + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_HALF)>; + (i32 VectorExtractions.BE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (BE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; } // IsBigEndian, HasDirectMove // v4f32 scalar <-> vector conversions (LE) @@ -1548,8 +1665,15 @@ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; } // IsLittleEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsLittleEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1561,73 +1685,77 @@ def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 MovesToVSR.LE_DWORD_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_BYTE)>; + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_HALF)>; + (i32 VectorExtractions.LE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (LE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; } // IsLittleEndian, HasDirectMove Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1036,7 +1036,7 @@ ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: extsh 3, 3 ; CHECK-LE-LABEL: @getvelss @@ -1072,7 +1072,7 @@ ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getvelus Index: test/CodeGen/PowerPC/variable_elem_vec_extracts.ll =================================================================== --- test/CodeGen/PowerPC/variable_elem_vec_extracts.ll +++ test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -0,0 +1,114 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P7 + +; Function Attrs: norecurse nounwind readnone +define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %b + ret i32 %vecext +; CHECK-LABEL: @geti +; CHECK-P7-LABEL: @geti +; CHECK-BE-LABEL: @geti +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK-DAG: li [[ONEREG:[0-9]+]], 1 +; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5 +; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5 +; CHECK: mfvsrd [[TOGPR:[0-9]+]], +; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECK: extsw 3, [[RSHREG]] +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lwax 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1 +; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5 +; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]], +; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECk-BE: extsw 3, [[RSHREG]] +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getl(<2 x i64> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x i64> %a, i32 %b + ret i64 %vecext +; CHECK-LABEL: @getl +; CHECK-P7-LABEL: @getl +; CHECK-BE-LABEL: @getl +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK: mfvsrd 3, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3 +; CHECK-P7-DAG: stxvd2x 34, +; CHECK-P7: ldx 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: mfvsrd 3, +} + +; Function Attrs: norecurse nounwind readnone +define float @getf(<4 x float> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x float> %a, i32 %b + ret float %vecext +; CHECK-LABEL: @getf +; CHECK-P7-LABEL: @getf +; CHECK-BE-LABEL: @getf +; CHECK: li [[IMMREG:[0-9]+]], 3 +; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK: xscvspdpn 1, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lfsx 1, [[ELEMOFFREG]], +; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2 +; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]] +; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: xscvspdpn 1, +} + +; Function Attrs: norecurse nounwind readnone +define double @getd(<2 x double> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x double> %a, i32 %b + ret double %vecext +; CHECK-LABEL: @getd +; CHECK-P7-LABEL: @getd +; CHECK-BE-LABEL: @getd +; CHECK: li [[TRUNCREG:[0-9]+]], 1 +; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK: xxlor 1, +; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-P7: xxlor 1, +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-BE: xxlor 1, +}