Index: test/CodeGen/X86/shuffle-vector-same-inputs.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/shuffle-vector-same-inputs.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL + +define <16 x i8> @foo(<64 x i8> %x) { +; AVX512F-LABEL: foo: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: foo: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: foo: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax +; AVX512BW-NEXT: vpextrb $1, %xmm0, %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm1 +; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 +; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax +; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax +; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: foo: +; AVX512BWVL: # BB#0: +; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax +; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %ecx +; AVX512BWVL-NEXT: vmovd %ecx, %xmm1 +; AVX512BWVL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $13, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2 +; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax +; AVX512BWVL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpextrb $14, %xmm0, %eax +; AVX512BWVL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> + ret <16 x i8> %res +}