Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -1901,6 +1901,98 @@ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +// Variable index unsigned vector_extract on Power9 +let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWRX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWRX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; +} +let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWLX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWLX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; +} + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), Index: llvm/trunk/test/CodeGen/PowerPC/vec_extract_p9.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec_extract_p9.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE + +define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { +; CHECK-LE-LABEL: test1: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: clrldi 3, 3, 56 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test1: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: clrldi 3, 3, 56 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define signext i8 @test2(<16 x i8> %a, i32 signext %index) { +; CHECK-LE-LABEL: test2: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextubrx 3, 5, 2 +; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test2: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextublx 3, 5, 2 +; CHECK-BE-NEXT: extsb 3, 3 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { +; CHECK-LE-LABEL: test3: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: vextuhrx 3, 3, 2 +; CHECK-LE-NEXT: clrldi 3, 3, 48 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test3: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: vextuhlx 3, 3, 2 +; CHECK-BE-NEXT: clrldi 3, 3, 48 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define signext i16 @test4(<8 x i16> %a, i32 signext %index) { +; CHECK-LE-LABEL: test4: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-LE-NEXT: vextuhrx 3, 3, 2 +; CHECK-LE-NEXT: extsh 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test4: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30 +; CHECK-BE-NEXT: vextuhlx 3, 3, 2 +; CHECK-BE-NEXT: extsh 3, 3 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { +; CHECK-LE-LABEL: test5: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test5: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +define signext i32 @test6(<4 x i32> %a, i32 signext %index) { +; CHECK-LE-LABEL: test6: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: extsw 3, 3 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test6: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: extsw 3, 3 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +; Test with immediate index +define zeroext i8 @test7(<16 x i8> %a) { +; CHECK-LE-LABEL: test7: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: li 3, 1 +; CHECK-LE-NEXT: vextubrx 3, 3, 2 +; CHECK-LE-NEXT: clrldi 3, 3, 56 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test7: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 1 +; CHECK-BE-NEXT: vextublx 3, 3, 2 +; CHECK-BE-NEXT: clrldi 3, 3, 56 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <16 x i8> %a, i32 1 + ret i8 %vecext +} + +define zeroext i16 @test8(<8 x i16> %a) { +; CHECK-LE-LABEL: test8: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: li 3, 2 +; CHECK-LE-NEXT: vextuhrx 3, 3, 2 +; CHECK-LE-NEXT: clrldi 3, 3, 48 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test8: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 2 +; CHECK-BE-NEXT: vextuhlx 3, 3, 2 +; CHECK-BE-NEXT: clrldi 3, 3, 48 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <8 x i16> %a, i32 1 + ret i16 %vecext +} + +define zeroext i32 @test9(<4 x i32> %a) { +; CHECK-LE-LABEL: test9: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: li 3, 4 +; CHECK-LE-NEXT: vextuwrx 3, 3, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: test9: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: li 3, 4 +; CHECK-BE-NEXT: vextuwlx 3, 3, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + ret i32 %vecext +}