@@ -1655,6 +1655,148 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_
1655
1655
ret <32 x i8 > %shuffle
1656
1656
}
1657
1657
1658
+ ;
1659
+ ; Shuffle to logical bit shifts
1660
+ ;
1661
+
1662
+ define <32 x i8 > @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30 (<32 x i8 > %a ) {
1663
+ ; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
1664
+ ; AVX1: # BB#0:
1665
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1666
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1667
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1668
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1669
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1670
+ ; AVX1-NEXT: vpshuflw $0, %xmm3, %xmm3 # xmm3 = xmm3[0,0,0,0,4,5,6,7]
1671
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1672
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1673
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1674
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1675
+ ; AVX1-NEXT: retq
1676
+ ;
1677
+ ; AVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
1678
+ ; AVX2: # BB#0:
1679
+ ; AVX2-NEXT: vpsllw $8, %ymm0
1680
+ ; AVX2-NEXT: retq
1681
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 32 , i32 0 , i32 32 , i32 2 , i32 32 , i32 4 , i32 32 , i32 6 , i32 32 , i32 8 , i32 32 , i32 10 , i32 32 , i32 12 , i32 32 , i32 14 , i32 32 , i32 16 , i32 32 , i32 18 , i32 32 , i32 20 , i32 32 , i32 22 , i32 32 , i32 24 , i32 32 , i32 26 , i32 32 , i32 28 , i32 32 , i32 30 >
1682
+ ret <32 x i8 > %shuffle
1683
+ }
1684
+
1685
+ define <32 x i8 > @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29 (<32 x i8 > %a ) {
1686
+ ; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
1687
+ ; AVX1: # BB#0:
1688
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1689
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,0,1,128,128,4,5,128,128,8,9,128,128,12,13]
1690
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1691
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1692
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero
1693
+ ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
1694
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1695
+ ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1696
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1697
+ ; AVX1-NEXT: retq
1698
+ ;
1699
+ ; AVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
1700
+ ; AVX2: # BB#0:
1701
+ ; AVX2-NEXT: vpslld $16, %ymm0
1702
+ ; AVX2-NEXT: retq
1703
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 32 , i32 32 , i32 0 , i32 1 , i32 32 , i32 32 , i32 4 , i32 5 , i32 32 , i32 32 , i32 8 , i32 9 , i32 32 , i32 32 , i32 12 , i32 13 , i32 32 , i32 32 , i32 16 , i32 17 , i32 32 , i32 32 , i32 20 , i32 21 , i32 32 , i32 32 , i32 24 , i32 25 , i32 32 , i32 32 , i32 28 , i32 29 >
1704
+ ret <32 x i8 > %shuffle
1705
+ }
1706
+
1707
+ define <32 x i8 > @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25 (<32 x i8 > %a ) {
1708
+ ; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
1709
+ ; AVX1: # BB#0:
1710
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1711
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,0,1,128,128,128,128,128,128,8,9]
1712
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1713
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1714
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,0,0,0,0,0],zero,zero,xmm3[0,0,0,0,0,0],zero,zero
1715
+ ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
1716
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1717
+ ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1718
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1719
+ ; AVX1-NEXT: retq
1720
+ ;
1721
+ ; AVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
1722
+ ; AVX2: # BB#0:
1723
+ ; AVX2-NEXT: vpsllq $48, %ymm0
1724
+ ; AVX2-NEXT: retq
1725
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 0 , i32 1 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 8 , i32 9 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 16 , i32 17 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 24 , i32 25 >
1726
+ ret <32 x i8 > %shuffle
1727
+ }
1728
+
1729
+ define <32 x i8 > @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz (<32 x i8 > %a ) {
1730
+ ; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
1731
+ ; AVX1: # BB#0:
1732
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1733
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
1734
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1735
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1736
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1737
+ ; AVX1-NEXT: vpshuflw $0, %xmm3, %xmm3 # xmm3 = xmm3[0,0,0,0,4,5,6,7]
1738
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1739
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1740
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1741
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1742
+ ; AVX1-NEXT: retq
1743
+ ;
1744
+ ; AVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
1745
+ ; AVX2: # BB#0:
1746
+ ; AVX2-NEXT: vpsrlw $8, %ymm0
1747
+ ; AVX2-NEXT: retq
1748
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 1 , i32 32 , i32 3 , i32 32 , i32 5 , i32 32 , i32 7 , i32 32 , i32 9 , i32 32 , i32 11 , i32 32 , i32 13 , i32 32 , i32 15 , i32 32 , i32 17 , i32 32 , i32 19 , i32 32 , i32 21 , i32 32 , i32 23 , i32 32 , i32 25 , i32 32 , i32 27 , i32 32 , i32 29 , i32 32 , i32 31 , i32 32 >
1749
+ ret <32 x i8 > %shuffle
1750
+ }
1751
+
1752
+ define <32 x i8 > @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz (<32 x i8 > %a ) {
1753
+ ; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
1754
+ ; AVX1: # BB#0:
1755
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1756
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,128,128,6,7,128,128,10,11,128,128,14,15,128,128]
1757
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1758
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1759
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0]
1760
+ ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1761
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1762
+ ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1763
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1764
+ ; AVX1-NEXT: retq
1765
+ ;
1766
+ ; AVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
1767
+ ; AVX2: # BB#0:
1768
+ ; AVX2-NEXT: vpsrld $16, %ymm0
1769
+ ; AVX2-NEXT: retq
1770
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 2 , i32 3 , i32 32 , i32 32 , i32 6 , i32 7 , i32 32 , i32 32 , i32 10 , i32 11 , i32 32 , i32 32 , i32 14 , i32 15 , i32 32 , i32 32 , i32 18 , i32 19 , i32 32 , i32 32 , i32 22 , i32 23 , i32 32 , i32 32 , i32 26 , i32 27 , i32 32 , i32 32 , i32 30 , i32 31 , i32 32 , i32 32 >
1771
+ ret <32 x i8 > %shuffle
1772
+ }
1773
+
1774
+ define <32 x i8 > @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz (<32 x i8 > %a ) {
1775
+ ; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
1776
+ ; AVX1: # BB#0:
1777
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1778
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <7,128,128,128,15,128,128,128,u,u,u,u,u,u,u,u>
1779
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1780
+ ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1781
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = zero,xmm3[0,0,0],zero,xmm3[0,0,0,u,u,u,u,u,u,u,u]
1782
+ ; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1
1783
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1784
+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1785
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1786
+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1787
+ ; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
1788
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1789
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1790
+ ; AVX1-NEXT: retq
1791
+ ;
1792
+ ; AVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
1793
+ ; AVX2: # BB#0:
1794
+ ; AVX2-NEXT: vpsrlq $56, %ymm0
1795
+ ; AVX2-NEXT: retq
1796
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > zeroinitializer , <32 x i32 > <i32 7 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 15 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 23 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 31 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 , i32 32 >
1797
+ ret <32 x i8 > %shuffle
1798
+ }
1799
+
1658
1800
define <32 x i8 > @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz (<32 x i8 > %a ) {
1659
1801
; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
1660
1802
; AVX1: # BB#0:
0 commit comments