Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | |||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,5,6] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,5,6] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
▲ Show 20 Lines • Show All 564 Lines • ▼ Show 20 Lines | |||||
define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,2,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0,0,0,0,4,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0,0,0,5,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0,0,6,0,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> | %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> | ||||
ret <16 x i16> %shuffle | ret <16 x i16> %shuffle | ||||
} | } | ||||
define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { | ||||
; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,0,0,0,4,5,6,7] | ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX512VL-SLOW: # %bb.0: | ; AVX512VL-SLOW: # %bb.0: | ||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3] | ; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,0,0,0,4,5,6,7] | |||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] | |||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] | ||||
; AVX512VL-SLOW-NEXT: retq | ; AVX512VL-SLOW-NEXT: retq | ||||
; | ; | ||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ; AVX512VL-FAST-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: | ||||
; AVX512VL-FAST: # %bb.0: | ; AVX512VL-FAST: # %bb.0: | ||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] | ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] | ||||
; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ; AVX512VL-FAST-NEXT: vpermw %ymm0, %ymm1, %ymm0 | ||||
; AVX512VL-FAST-NEXT: retq | ; AVX512VL-FAST-NEXT: retq | ||||
▲ Show 20 Lines • Show All 3,643 Lines • ▼ Show 20 Lines | |||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 | ||||
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 | ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 | ||||
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 | ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | ||||
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 | ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-SLOW-LABEL: PR34369: | ; AVX2-LABEL: PR34369: | ||||
; AVX2-SLOW: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] | ; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] | ||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,1] | ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] | ||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7] | ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] | ||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,6] | ; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15] | ||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] | ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] | ||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15] | ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 | ||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] | ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 | ||||
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 | ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 | ||||
; AVX2-SLOW-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 | ; AVX2-NEXT: retq | ||||
; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0 | |||||
; AVX2-SLOW-NEXT: retq | |||||
; | |||||
; AVX2-FAST-LABEL: PR34369: | |||||
; AVX2-FAST: # %bb.0: | |||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5] | |||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25] | |||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15] | |||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] | |||||
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |||||
; AVX2-FAST-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 | |||||
; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0 | |||||
; AVX2-FAST-NEXT: retq | |||||
; | ; | ||||
; AVX512VL-LABEL: PR34369: | ; AVX512VL-LABEL: PR34369: | ||||
; AVX512VL: # %bb.0: | ; AVX512VL: # %bb.0: | ||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12] | ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12] | ||||
; AVX512VL-NEXT: vptestnmw %ymm1, %ymm1, %k1 | ; AVX512VL-NEXT: vptestnmw %ymm1, %ymm1, %k1 | ||||
; AVX512VL-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} | ; AVX512VL-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} | ||||
; AVX512VL-NEXT: retq | ; AVX512VL-NEXT: retq | ||||
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12> | %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 9, i32 10, i32 12, i32 12> | ||||
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines |