@@ -139,14 +139,12 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
139
139
define <8 x double > @shuffle_v8f64_01014545 (<8 x double > %a , <8 x double > %b ) {
140
140
; AVX512F-LABEL: shuffle_v8f64_01014545:
141
141
; AVX512F: # BB#0:
142
- ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5]
143
- ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
142
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
144
143
; AVX512F-NEXT: retq
145
144
;
146
145
; AVX512F-32-LABEL: shuffle_v8f64_01014545:
147
146
; AVX512F-32: # BB#0:
148
- ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,4,0,5,0,4,0,5,0]
149
- ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
147
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
150
148
; AVX512F-32-NEXT: retl
151
149
%shuffle = shufflevector <8 x double > %a , <8 x double > %b , <8 x i32 > <i32 0 , i32 1 , i32 0 , i32 1 , i32 4 , i32 5 , i32 4 , i32 5 >
152
150
ret <8 x double > %shuffle
@@ -1122,18 +1120,16 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
1122
1120
}
1123
1121
1124
1122
define <8 x i64 > @shuffle_v8i64_01014545 (<8 x i64 > %a , <8 x i64 > %b ) {
1125
- ;
1126
1123
; AVX512F-LABEL: shuffle_v8i64_01014545:
1127
1124
; AVX512F: # BB#0:
1128
- ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5]
1129
- ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1125
+ ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1130
1126
; AVX512F-NEXT: retq
1131
1127
;
1132
1128
; AVX512F-32-LABEL: shuffle_v8i64_01014545:
1133
1129
; AVX512F-32: # BB#0:
1134
- ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,4,0,5,0,4,0,5,0]
1135
- ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1130
+ ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1136
1131
; AVX512F-32-NEXT: retl
1132
+
1137
1133
%shuffle = shufflevector <8 x i64 > %a , <8 x i64 > %b , <8 x i32 > <i32 0 , i32 1 , i32 0 , i32 1 , i32 4 , i32 5 , i32 4 , i32 5 >
1138
1134
ret <8 x i64 > %shuffle
1139
1135
}
@@ -2010,3 +2006,135 @@ define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
2010
2006
%shuffle = shufflevector <8 x i64 > %a , <8 x i64 > %b , <8 x i32 ><i32 1 , i32 9 , i32 3 , i32 11 , i32 5 , i32 13 , i32 7 , i32 15 >
2011
2007
ret <8 x i64 > %shuffle
2012
2008
}
2009
+
2010
+ define <8 x double > @test_vshuff64x2_512 (<8 x double > %x , <8 x double > %x1 ) nounwind {
2011
+ ; AVX512F-LABEL: test_vshuff64x2_512:
2012
+ ; AVX512F: # BB#0:
2013
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2014
+ ; AVX512F-NEXT: retq
2015
+ ;
2016
+ ; AVX512F-32-LABEL: test_vshuff64x2_512:
2017
+ ; AVX512F-32: # BB#0:
2018
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2019
+ ; AVX512F-32-NEXT: retl
2020
+ %res = shufflevector <8 x double > %x , <8 x double > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2021
+ ret <8 x double > %res
2022
+ }
2023
+
2024
+ define <8 x double > @test_vshuff64x2_512_maskz (<8 x double > %x , <8 x double > %x1 , <8 x i1 > %mask ) nounwind {
2025
+ ; AVX512F-LABEL: test_vshuff64x2_512_maskz:
2026
+ ; AVX512F: # BB#0:
2027
+ ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
2028
+ ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
2029
+ ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2030
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2031
+ ; AVX512F-NEXT: retq
2032
+ ;
2033
+ ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
2034
+ ; AVX512F-32: # BB#0:
2035
+ ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
2036
+ ; AVX512F-32-NEXT: vpandq .LCPI118_0, %zmm2, %zmm2
2037
+ ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2038
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2039
+ ; AVX512F-32-NEXT: retl
2040
+ %y = shufflevector <8 x double > %x , <8 x double > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2041
+ %res = select <8 x i1 > %mask , <8 x double > %y , <8 x double > zeroinitializer
2042
+ ret <8 x double > %res
2043
+ }
2044
+
2045
+ define <8 x i64 > @test_vshufi64x2_512_mask (<8 x i64 > %x , <8 x i64 > %x1 , <8 x i1 > %mask ) nounwind {
2046
+ ; AVX512F-LABEL: test_vshufi64x2_512_mask:
2047
+ ; AVX512F: # BB#0:
2048
+ ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
2049
+ ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
2050
+ ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2051
+ ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2052
+ ; AVX512F-NEXT: retq
2053
+ ;
2054
+ ; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
2055
+ ; AVX512F-32: # BB#0:
2056
+ ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
2057
+ ; AVX512F-32-NEXT: vpandq .LCPI119_0, %zmm2, %zmm2
2058
+ ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2059
+ ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2060
+ ; AVX512F-32-NEXT: retl
2061
+ %y = shufflevector <8 x i64 > %x , <8 x i64 > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2062
+ %res = select <8 x i1 > %mask , <8 x i64 > %y , <8 x i64 > %x
2063
+ ret <8 x i64 > %res
2064
+ }
2065
+
2066
+ define <8 x double > @test_vshuff64x2_512_mem (<8 x double > %x , <8 x double > *%ptr ) nounwind {
2067
+ ; AVX512F-LABEL: test_vshuff64x2_512_mem:
2068
+ ; AVX512F: # BB#0:
2069
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2070
+ ; AVX512F-NEXT: retq
2071
+ ;
2072
+ ; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
2073
+ ; AVX512F-32: # BB#0:
2074
+ ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2075
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2076
+ ; AVX512F-32-NEXT: retl
2077
+ %x1 = load <8 x double >,<8 x double > *%ptr ,align 1
2078
+ %res = shufflevector <8 x double > %x , <8 x double > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2079
+ ret <8 x double > %res
2080
+ }
2081
+
2082
+ define <8 x double > @test_vshuff64x2_512_mem_mask (<8 x double > %x , <8 x double > *%ptr , <8 x i1 > %mask ) nounwind {
2083
+ ; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
2084
+ ; AVX512F: # BB#0:
2085
+ ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
2086
+ ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
2087
+ ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2088
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2089
+ ; AVX512F-NEXT: retq
2090
+ ;
2091
+ ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
2092
+ ; AVX512F-32: # BB#0:
2093
+ ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
2094
+ ; AVX512F-32-NEXT: vpandq .LCPI121_0, %zmm1, %zmm1
2095
+ ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
2096
+ ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2097
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2098
+ ; AVX512F-32-NEXT: retl
2099
+ %x1 = load <8 x double >,<8 x double > *%ptr ,align 1
2100
+ %y = shufflevector <8 x double > %x , <8 x double > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2101
+ %res = select <8 x i1 > %mask , <8 x double > %y , <8 x double > %x
2102
+ ret <8 x double > %res
2103
+ }
2104
+
2105
+ define <8 x double > @test_vshuff64x2_512_mem_maskz (<8 x double > %x , <8 x double > *%ptr , <8 x i1 > %mask ) nounwind {
2106
+ ; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
2107
+ ; AVX512F: # BB#0:
2108
+ ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
2109
+ ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
2110
+ ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2111
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2112
+ ; AVX512F-NEXT: retq
2113
+ ;
2114
+ ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
2115
+ ; AVX512F-32: # BB#0:
2116
+ ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
2117
+ ; AVX512F-32-NEXT: vpandq .LCPI122_0, %zmm1, %zmm1
2118
+ ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
2119
+ ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2120
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2121
+ ; AVX512F-32-NEXT: retl
2122
+ %x1 = load <8 x double >,<8 x double > *%ptr ,align 1
2123
+ %y = shufflevector <8 x double > %x , <8 x double > %x1 , <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 10 , i32 11 , i32 8 , i32 9 >
2124
+ %res = select <8 x i1 > %mask , <8 x double > %y , <8 x double > zeroinitializer
2125
+ ret <8 x double > %res
2126
+ }
2127
+
2128
+ define <16 x float > @test_vshuff32x4_512 (<16 x float > %x , <16 x float > %x1 ) nounwind {
2129
+ ; AVX512F-LABEL: test_vshuff32x4_512:
2130
+ ; AVX512F: # BB#0:
2131
+ ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2132
+ ; AVX512F-NEXT: retq
2133
+ ;
2134
+ ; AVX512F-32-LABEL: test_vshuff32x4_512:
2135
+ ; AVX512F-32: # BB#0:
2136
+ ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2137
+ ; AVX512F-32-NEXT: retl
2138
+ %res = shufflevector <16 x float > %x , <16 x float > %x1 , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 20 , i32 21 , i32 22 , i32 23 , i32 16 , i32 17 , i32 18 , i32 19 >
2139
+ ret <16 x float > %res
2140
+ }
0 commit comments