Skip to content

Commit f7cb16f

Browse files
committedMar 16, 2016
[X86][SSE41] Additional tests for extracting zeroable shuffle elements
We can currently only match zeroable vector elements of the same size as the shuffle type - these tests demonstrate the problem and a solution will be shortly added in an updated D14261 llvm-svn: 263606
1 parent 64d9d7c commit f7cb16f

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed
 

‎llvm/test/CodeGen/X86/insertps-combine.ll

+83
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,89 @@ define <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) {
132132
ret <4 x float> %res2
133133
}
134134

135+
define <4 x float> @insertps_zero_from_v2f64(<4 x float> %a0, <2 x double>* %a1) nounwind {
136+
; SSE-LABEL: insertps_zero_from_v2f64:
137+
; SSE: # BB#0:
138+
; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
139+
; SSE-NEXT: movapd (%rdi), %xmm2
140+
; SSE-NEXT: addpd %xmm1, %xmm2
141+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
142+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
143+
; SSE-NEXT: movapd %xmm2, (%rdi)
144+
; SSE-NEXT: movaps %xmm1, %xmm0
145+
; SSE-NEXT: retq
146+
;
147+
; AVX-LABEL: insertps_zero_from_v2f64:
148+
; AVX: # BB#0:
149+
; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
150+
; AVX-NEXT: vaddpd (%rdi), %xmm1, %xmm2
151+
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
152+
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3]
153+
; AVX-NEXT: vmovapd %xmm2, (%rdi)
154+
; AVX-NEXT: retq
155+
%1 = load <2 x double>, <2 x double>* %a1
156+
%2 = bitcast <2 x double> <double 1.0, double 2.0> to <4 x float>
157+
%3 = fadd <2 x double> %1, <double 1.0, double 2.0>
158+
%4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 6, i32 2, i32 2, i32 3>
159+
store <2 x double> %3, <2 x double> *%a1
160+
ret <4 x float> %4
161+
}
162+
163+
define <4 x float> @insertps_zero_from_v2i64(<4 x float> %a0, <2 x i64>* %a1) nounwind {
164+
; SSE-LABEL: insertps_zero_from_v2i64:
165+
; SSE: # BB#0:
166+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,18446744073709551614]
167+
; SSE-NEXT: movdqa (%rdi), %xmm2
168+
; SSE-NEXT: paddq %xmm1, %xmm2
169+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
170+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
171+
; SSE-NEXT: movdqa %xmm2, (%rdi)
172+
; SSE-NEXT: movaps %xmm1, %xmm0
173+
; SSE-NEXT: retq
174+
;
175+
; AVX-LABEL: insertps_zero_from_v2i64:
176+
; AVX: # BB#0:
177+
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,18446744073709551614]
178+
; AVX-NEXT: vpaddq (%rdi), %xmm1, %xmm2
179+
; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
180+
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3]
181+
; AVX-NEXT: vmovdqa %xmm2, (%rdi)
182+
; AVX-NEXT: retq
183+
%1 = load <2 x i64>, <2 x i64>* %a1
184+
%2 = bitcast <2 x i64> <i64 1, i64 -2> to <4 x float>
185+
%3 = add <2 x i64> %1, <i64 1, i64 -2>
186+
%4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 6, i32 2, i32 2, i32 3>
187+
store <2 x i64> %3, <2 x i64> *%a1
188+
ret <4 x float> %4
189+
}
190+
191+
define <4 x float> @insertps_zero_from_v8i16(<4 x float> %a0, <8 x i16>* %a1) nounwind {
192+
; SSE-LABEL: insertps_zero_from_v8i16:
193+
; SSE: # BB#0:
194+
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3]
195+
; SSE-NEXT: movdqa (%rdi), %xmm2
196+
; SSE-NEXT: paddw %xmm1, %xmm2
197+
; SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
198+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,2,3]
199+
; SSE-NEXT: movdqa %xmm2, (%rdi)
200+
; SSE-NEXT: retq
201+
;
202+
; AVX-LABEL: insertps_zero_from_v8i16:
203+
; AVX: # BB#0:
204+
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,1,1,2,2,3,3]
205+
; AVX-NEXT: vpaddw (%rdi), %xmm1, %xmm2
206+
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
207+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
208+
; AVX-NEXT: vmovdqa %xmm2, (%rdi)
209+
; AVX-NEXT: retq
210+
%1 = load <8 x i16>, <8 x i16>* %a1
211+
%2 = bitcast <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 2, i16 2, i16 3, i16 3> to <4 x float>
212+
%3 = add <8 x i16> %1, <i16 0, i16 0, i16 1, i16 1, i16 2, i16 2, i16 3, i16 3>
213+
%4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 4, i32 2, i32 2, i32 3>
214+
store <8 x i16> %3, <8 x i16> *%a1
215+
ret <4 x float> %4
216+
}
217+
135218
define <4 x float> @consecutive_load_insertps_04zz(float* %p) {
136219
; SSE-LABEL: consecutive_load_insertps_04zz:
137220
; SSE: # BB#0:

0 commit comments

Comments
 (0)