Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1769,6 +1769,7 @@ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } +def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let AddedComplexity = 400 in { // v4f32 scalar <-> vector conversions (BE) let Predicates = [IsBigEndian, HasP8Vector] in { @@ -1801,6 +1802,17 @@ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; + + // v2i64 scalar <-> vector conversions (BE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; +} // IsBigEndian, HasDirectMove + +let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), @@ -1867,15 +1879,7 @@ (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 VectorExtractions.BE_VARIABLE_WORD)>; - - // v2i64 scalar <-> vector conversions (BE) - def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 VectorExtractions.LE_DWORD_1)>; - def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 VectorExtractions.LE_DWORD_0)>; - def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), - (i64 VectorExtractions.BE_VARIABLE_DWORD)>; -} // IsBigEndian, HasDirectMove +} // IsBigEndian, HasDirectMove, NoP9Vector // v4f32 scalar <-> vector conversions (LE) let Predicates = [IsLittleEndian, HasP8Vector] in { @@ -1931,8 +1935,10 @@ (VEXTUWRX (LI8 0), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), (VEXTUWRX (LI8 4), $S)>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), - (VEXTUWRX (LI8 8), $S)>; + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), (VEXTUWRX (LI8 12), $S)>; @@ -1942,11 +1948,82 @@ (EXTSW (VEXTUWRX (LI8 0), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), (EXTSW (VEXTUWRX (LI8 4), $S))>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), - (EXTSW (VEXTUWRX (LI8 8), $S))>; + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), (EXTSW (VEXTUWRX (LI8 12), $S))>; + + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHRX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWRX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; } + let Predicates = [HasP9Altivec, IsBigEndian] in { def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), (VEXTUBLX $Idx, $S)>; @@ -1974,8 +2051,11 @@ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), (VEXTUWLX (LI8 0), $S)>; + + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), - (VEXTUWLX (LI8 4), $S)>; + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), (VEXTUWLX (LI8 8), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), @@ -1985,12 +2065,82 @@ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), (EXTSW (VEXTUWLX (LI8 0), $S))>; + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), - (EXTSW (VEXTUWLX (LI8 4), $S))>; + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), (EXTSW (VEXTUWLX (LI8 8), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), (EXTSW (VEXTUWLX (LI8 12), $S))>; + + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHLX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWLX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; } let Predicates = [IsLittleEndian, HasDirectMove] in { @@ -2003,6 +2153,16 @@ (v4i32 MovesToVSR.LE_WORD_0)>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 MovesToVSR.LE_DWORD_0)>; + // v2i64 scalar <-> vector conversions (LE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; +} // IsLittleEndian, HasDirectMove + +let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), @@ -2069,15 +2229,7 @@ (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 VectorExtractions.LE_VARIABLE_WORD)>; - - // v2i64 scalar <-> vector conversions (LE) - def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 VectorExtractions.LE_DWORD_0)>; - def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 VectorExtractions.LE_DWORD_1)>; - def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), - (i64 VectorExtractions.LE_VARIABLE_DWORD)>; -} // IsLittleEndian, HasDirectMove +} // IsLittleEndian, HasDirectMove, NoP9Vector let Predicates = [HasDirectMove, HasVSX] in { // bitconvert f32 -> i32 @@ -2997,7 +3149,6 @@ } // Patterns for BUILD_VECTOR nodes. -def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let AddedComplexity = 400 in { let Predicates = [HasVSX] in { Index: test/CodeGen/PowerPC/vec_extract_p9.ll =================================================================== --- test/CodeGen/PowerPC/vec_extract_p9.ll +++ test/CodeGen/PowerPC/vec_extract_p9.ll @@ -152,16 +152,16 @@ define zeroext i32 @test9(<4 x i32> %a) { ; CHECK-LE-LABEL: test9: ; CHECK-LE: # BB#0: # %entry -; CHECK-LE-NEXT: li 3, 4 +; CHECK-LE-NEXT: li 3, 12 ; CHECK-LE-NEXT: vextuwrx 3, 3, 2 ; CHECK-LE-NEXT: blr ; CHECK-BE-LABEL: test9: ; CHECK-BE: # BB#0: # %entry -; CHECK-BE-NEXT: li 3, 4 +; CHECK-BE-NEXT: li 3, 12 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr entry: - %vecext = extractelement <4 x i32> %a, i32 1 + %vecext = extractelement <4 x i32> %a, i32 3 ret i32 %vecext } Index: test/CodeGen/PowerPC/vec_extract_p9_2.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_extract_p9_2.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE + +define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { +; CHECK-LE-LABEL: test_add1: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add1: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: rlwinm 3, 3, 0, 24, 31 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv = zext i8 %vecext to i32 + %conv1 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { +; CHECK-LE-LABEL: test_add2: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add2: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: extsb 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv3 = zext i8 %vecext to i32 + %conv14 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv3, %conv14 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { +; CHECK-LE-LABEL: test_add3: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: vextuhrx 3, 3, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add3: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: vextuhlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: rlwinm 3, 3, 0, 16, 31 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv = zext i16 %vecext to i32 + %conv1 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { +; CHECK-LE-LABEL: test_add4: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: vextuhrx 3, 3, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: extsh 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add4: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: vextuhlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: extsh 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv5 = zext i16 %vecext to i32 + %conv16 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv5, %conv16 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { +; CHECK-LE-LABEL: test_add5: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: clrldi 3, 3, 32 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add5: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: clrldi 3, 3, 32 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { +; CHECK-LE-LABEL: test_add6: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: add 3, 3, 6 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test_add6: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 6 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx +define zeroext i32 @test7(<4 x i32> %a) { +; CHECK-LE-LABEL: test7: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: mfvsrwz 3, 34 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test7: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 8 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) { +; CHECK-LE-LABEL: testadd_7: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: mfvsrwz 3, 34 +; CHECK-LE-NEXT: add 3, 3, 5 +; CHECK-LE-NEXT: clrldi 3, 3, 32 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: testadd_7: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 8 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 5 +; CHECK-BE-NEXT: clrldi 3, 3, 32 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test8(<4 x i32> %a) { +; CHECK-LE-LABEL: test8: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: mfvsrwz 3, 34 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test8: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 8 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) { +; CHECK-LE-LABEL: testadd_8: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: mfvsrwz 3, 34 +; CHECK-LE-NEXT: add 3, 3, 5 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: testadd_8: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 8 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: add 3, 3, 5 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx +define signext i32 @test9(<4 x i32> %a) { +; CHECK-LE-LABEL: test9: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: li 3, 4 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test9: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: mfvsrwz 3, 34 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + ret i32 %vecext +} + +define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) { +; CHECK-LE-LABEL: testadd_9: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: li 3, 4 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: add 3, 3, 5 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: testadd_9: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: mfvsrwz 3, 34 +; CHECK-BE-NEXT: add 3, 3, 5 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %add = add nsw i32 %vecext, %c + ret i32 %add +}