Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2693,6 +2693,36 @@ dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); +} + +def ByteToDWord { + dag A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); +} + +def HWordToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); +} + +def HWordToDWord { + dag A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); +} + +def WordToDWord { + dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2933,4 +2963,19 @@ (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec] in { + def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, HWordToWord.A2, HWordToWord.A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, ByteToWord.A2, ByteToWord.A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + (v2i64 (VEXTSB2D $A))>; + } } Index: test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_int_ext.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8 +target triple = "powerpc64le-unknown-linux-gnu" + +define <4 x i32> @vextsb2w(<16 x i8> %a) { +; PWR9-LABEL: vextsb2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2w 2, 2 +; PWR9-NEXT: blr +; +; PWR8-LABEL: vextsb2w: +; PWR8: # BB#0: # %entry +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: mfvsrd 3, 34 +; PWR8-NEXT: clrldi 4, 3, 56 +; PWR8-NEXT: rldicl 3, 3, 32, 56 +; PWR8-NEXT: mfvsrd 5, 0 +; PWR8-NEXT: extsb 4, 4 +; PWR8-NEXT: extsb 3, 3 +; PWR8-NEXT: mtvsrwz 0, 4 +; PWR8-NEXT: clrldi 4, 5, 56 +; PWR8-NEXT: rldicl 12, 5, 32, 56 +; PWR8-NEXT: mtvsrwz 1, 3 +; PWR8-NEXT: extsb 4, 4 +; PWR8-NEXT: extsb 3, 12 +; PWR8-NEXT: mtvsrwz 2, 4 +; PWR8-NEXT: mtvsrwz 3, 3 +; PWR8-NEXT: xxmrghd 51, 0, 2 +; PWR8-NEXT: xxmrghd 35, 1, 3 +; PWR8-NEXT: vmrgow 2, 3, 19 +; PWR8-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 4 + %conv2 = sext i8 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 8 + %conv5 = sext i8 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 12 + %conv8 = sext i8 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsb2d(<16 x i8> %a) { +; PWR9-LABEL: vextsb2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2d 2, 2 +; PWR9-NEXT: blr +; +; PWR8-LABEL: vextsb2d: +; PWR8: # BB#0: # %entry +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: mfvsrd 3, 34 +; PWR8-NEXT: clrldi 3, 3, 56 +; PWR8-NEXT: mfvsrd 4, 0 +; PWR8-NEXT: extsb 3, 3 +; PWR8-NEXT: mtvsrd 0, 3 +; PWR8-NEXT: clrldi 4, 4, 56 +; PWR8-NEXT: extsb 4, 4 +; PWR8-NEXT: mtvsrd 1, 4 +; PWR8-NEXT: xxmrghd 34, 0, 1 +; PWR8-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 8 + %conv2 = sext i8 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsh2w(<8 x i16> %a) { +; PWR9-LABEL: vextsh2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2w 2, 2 +; PWR9-NEXT: blr +; +; PWR8-LABEL: vextsh2w: +; PWR8: # BB#0: # %entry +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: mfvsrd 3, 34 +; PWR8-NEXT: clrldi 4, 3, 48 +; PWR8-NEXT: rldicl 3, 3, 32, 48 +; PWR8-NEXT: mfvsrd 5, 0 +; PWR8-NEXT: extsh 4, 4 +; PWR8-NEXT: extsh 3, 3 +; PWR8-NEXT: mtvsrwz 0, 4 +; PWR8-NEXT: clrldi 4, 5, 48 +; PWR8-NEXT: rldicl 12, 5, 32, 48 +; PWR8-NEXT: mtvsrwz 1, 3 +; PWR8-NEXT: extsh 4, 4 +; PWR8-NEXT: extsh 3, 12 +; PWR8-NEXT: mtvsrwz 2, 4 +; PWR8-NEXT: mtvsrwz 3, 3 +; PWR8-NEXT: xxmrghd 51, 0, 2 +; PWR8-NEXT: xxmrghd 35, 1, 3 +; PWR8-NEXT: vmrgow 2, 3, 19 +; PWR8-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sext i16 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sext i16 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sext i16 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsh2d(<8 x i16> %a) { +; PWR9-LABEL: vextsh2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2d 2, 2 +; PWR9-NEXT: blr +; +; PWR8-LABEL: vextsh2d: +; PWR8: # BB#0: # %entry +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: mfvsrd 3, 34 +; PWR8-NEXT: clrldi 3, 3, 48 +; PWR8-NEXT: mfvsrd 4, 0 +; PWR8-NEXT: extsh 3, 3 +; PWR8-NEXT: mtvsrd 0, 3 +; PWR8-NEXT: clrldi 4, 4, 48 +; PWR8-NEXT: extsh 4, 4 +; PWR8-NEXT: mtvsrd 1, 4 +; PWR8-NEXT: xxmrghd 34, 0, 1 +; PWR8-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sext i16 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <2 x i64> @vextsw2d(<4 x i32> %a) { +; PWR9-LABEL: vextsw2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsw2d 2, 2 +; PWR9-NEXT: blr +; +; PWR8-LABEL: vextsw2d: +; PWR8: # BB#0: # %entry +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: mfvsrwz 4, 34 +; PWR8-NEXT: extsw 4, 4 +; PWR8-NEXT: mfvsrwz 3, 0 +; PWR8-NEXT: mtvsrd 1, 4 +; PWR8-NEXT: extsw 3, 3 +; PWR8-NEXT: mtvsrd 0, 3 +; PWR8-NEXT: xxmrghd 34, 1, 0 +; PWR8-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sext i32 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sext i32 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +}