@@ -132,6 +132,89 @@ define <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) {
132
132
ret <4 x float > %res2
133
133
}
134
134
135
+ define <4 x float > @insertps_zero_from_v2f64 (<4 x float > %a0 , <2 x double >* %a1 ) nounwind {
136
+ ; SSE-LABEL: insertps_zero_from_v2f64:
137
+ ; SSE: # BB#0:
138
+ ; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
139
+ ; SSE-NEXT: movapd (%rdi), %xmm2
140
+ ; SSE-NEXT: addpd %xmm1, %xmm2
141
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
142
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
143
+ ; SSE-NEXT: movapd %xmm2, (%rdi)
144
+ ; SSE-NEXT: movaps %xmm1, %xmm0
145
+ ; SSE-NEXT: retq
146
+ ;
147
+ ; AVX-LABEL: insertps_zero_from_v2f64:
148
+ ; AVX: # BB#0:
149
+ ; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
150
+ ; AVX-NEXT: vaddpd (%rdi), %xmm1, %xmm2
151
+ ; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
152
+ ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3]
153
+ ; AVX-NEXT: vmovapd %xmm2, (%rdi)
154
+ ; AVX-NEXT: retq
155
+ %1 = load <2 x double >, <2 x double >* %a1
156
+ %2 = bitcast <2 x double > <double 1 .0 , double 2 .0 > to <4 x float >
157
+ %3 = fadd <2 x double > %1 , <double 1 .0 , double 2 .0 >
158
+ %4 = shufflevector <4 x float > %a0 , <4 x float > %2 , <4 x i32 > <i32 6 , i32 2 , i32 2 , i32 3 >
159
+ store <2 x double > %3 , <2 x double > *%a1
160
+ ret <4 x float > %4
161
+ }
162
+
163
+ define <4 x float > @insertps_zero_from_v2i64 (<4 x float > %a0 , <2 x i64 >* %a1 ) nounwind {
164
+ ; SSE-LABEL: insertps_zero_from_v2i64:
165
+ ; SSE: # BB#0:
166
+ ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,18446744073709551614]
167
+ ; SSE-NEXT: movdqa (%rdi), %xmm2
168
+ ; SSE-NEXT: paddq %xmm1, %xmm2
169
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
170
+ ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
171
+ ; SSE-NEXT: movdqa %xmm2, (%rdi)
172
+ ; SSE-NEXT: movaps %xmm1, %xmm0
173
+ ; SSE-NEXT: retq
174
+ ;
175
+ ; AVX-LABEL: insertps_zero_from_v2i64:
176
+ ; AVX: # BB#0:
177
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,18446744073709551614]
178
+ ; AVX-NEXT: vpaddq (%rdi), %xmm1, %xmm2
179
+ ; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
180
+ ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3]
181
+ ; AVX-NEXT: vmovdqa %xmm2, (%rdi)
182
+ ; AVX-NEXT: retq
183
+ %1 = load <2 x i64 >, <2 x i64 >* %a1
184
+ %2 = bitcast <2 x i64 > <i64 1 , i64 -2 > to <4 x float >
185
+ %3 = add <2 x i64 > %1 , <i64 1 , i64 -2 >
186
+ %4 = shufflevector <4 x float > %a0 , <4 x float > %2 , <4 x i32 > <i32 6 , i32 2 , i32 2 , i32 3 >
187
+ store <2 x i64 > %3 , <2 x i64 > *%a1
188
+ ret <4 x float > %4
189
+ }
190
+
191
+ define <4 x float > @insertps_zero_from_v8i16 (<4 x float > %a0 , <8 x i16 >* %a1 ) nounwind {
192
+ ; SSE-LABEL: insertps_zero_from_v8i16:
193
+ ; SSE: # BB#0:
194
+ ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3]
195
+ ; SSE-NEXT: movdqa (%rdi), %xmm2
196
+ ; SSE-NEXT: paddw %xmm1, %xmm2
197
+ ; SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
198
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
199
+ ; SSE-NEXT: movdqa %xmm2, (%rdi)
200
+ ; SSE-NEXT: retq
201
+ ;
202
+ ; AVX-LABEL: insertps_zero_from_v8i16:
203
+ ; AVX: # BB#0:
204
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3]
205
+ ; AVX-NEXT: vpaddw (%rdi), %xmm1, %xmm2
206
+ ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
207
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
208
+ ; AVX-NEXT: vmovdqa %xmm2, (%rdi)
209
+ ; AVX-NEXT: retq
210
+ %1 = load <8 x i16 >, <8 x i16 >* %a1
211
+ %2 = bitcast <8 x i16 > <i16 0 , i16 0 , i16 1 , i16 1 , i16 2 , i16 2 , i16 3 , i16 3 > to <4 x float >
212
+ %3 = add <8 x i16 > %1 , <i16 0 , i16 0 , i16 1 , i16 1 , i16 2 , i16 2 , i16 3 , i16 3 >
213
+ %4 = shufflevector <4 x float > %a0 , <4 x float > %2 , <4 x i32 > <i32 4 , i32 2 , i32 2 , i32 3 >
214
+ store <8 x i16 > %3 , <8 x i16 > *%a1
215
+ ret <4 x float > %4
216
+ }
217
+
135
218
define <4 x float > @consecutive_load_insertps_04zz (float * %p ) {
136
219
; SSE-LABEL: consecutive_load_insertps_04zz:
137
220
; SSE: # BB#0:
0 commit comments