@@ -407,33 +407,19 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
407
407
;
408
408
; AVX512F-LABEL: fptoui_2f64_to_4i32:
409
409
; AVX512F: # BB#0:
410
- ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
411
- ; AVX512F-NEXT: vmovq %rax, %xmm1
412
- ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
413
- ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
414
- ; AVX512F-NEXT: vmovq %rax, %xmm0
415
- ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
416
- ; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
410
+ ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
411
+ ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
417
412
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
418
413
; AVX512F-NEXT: retq
419
414
;
420
415
; AVX512VL-LABEL: fptoui_2f64_to_4i32:
421
416
; AVX512VL: # BB#0:
422
- ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
423
- ; AVX512VL-NEXT: vmovq %rax, %xmm1
424
- ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
425
- ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
426
- ; AVX512VL-NEXT: vmovq %rax, %xmm0
427
- ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
428
- ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
429
- ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
417
+ ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
430
418
; AVX512VL-NEXT: retq
431
419
;
432
420
; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
433
421
; AVX512VLDQ: # BB#0:
434
- ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
435
- ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
436
- ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
422
+ ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
437
423
; AVX512VLDQ-NEXT: retq
438
424
%cvt = fptoui <2 x double > %a to <2 x i32 >
439
425
%ext = shufflevector <2 x i32 > %cvt , <2 x i32 > zeroinitializer , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
@@ -491,30 +477,19 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
491
477
;
492
478
; AVX512F-LABEL: fptoui_2f64_to_2i32:
493
479
; AVX512F: # BB#0:
494
- ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
495
- ; AVX512F-NEXT: vmovq %rax, %xmm1
496
- ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
497
- ; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
498
- ; AVX512F-NEXT: vmovq %rax, %xmm0
499
- ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
500
- ; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
480
+ ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
481
+ ; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
482
+ ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
501
483
; AVX512F-NEXT: retq
502
484
;
503
485
; AVX512VL-LABEL: fptoui_2f64_to_2i32:
504
486
; AVX512VL: # BB#0:
505
- ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
506
- ; AVX512VL-NEXT: vmovq %rax, %xmm1
507
- ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
508
- ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
509
- ; AVX512VL-NEXT: vmovq %rax, %xmm0
510
- ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
511
- ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
487
+ ; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
512
488
; AVX512VL-NEXT: retq
513
489
;
514
490
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
515
491
; AVX512VLDQ: # BB#0:
516
- ; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
517
- ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
492
+ ; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
518
493
; AVX512VLDQ-NEXT: retq
519
494
%cvt = fptoui <2 x double > %a to <2 x i32 >
520
495
%ext = shufflevector <2 x i32 > %cvt , <2 x i32 > undef , <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
@@ -1250,15 +1225,24 @@ define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1250
1225
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1251
1226
; AVX-NEXT: retq
1252
1227
;
1253
- ; AVX512-LABEL: fptoui_2f32_to_2i32:
1254
- ; AVX512: # BB#0:
1255
- ; AVX512-NEXT: vcvttss2usi %xmm0, %rax
1256
- ; AVX512-NEXT: vmovq %rax, %xmm1
1257
- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1258
- ; AVX512-NEXT: vcvttss2usi %xmm0, %rax
1259
- ; AVX512-NEXT: vmovq %rax, %xmm0
1260
- ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1261
- ; AVX512-NEXT: retq
1228
+ ; AVX512F-LABEL: fptoui_2f32_to_2i32:
1229
+ ; AVX512F: # BB#0:
1230
+ ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1231
+ ; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
1232
+ ; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1233
+ ; AVX512F-NEXT: retq
1234
+ ;
1235
+ ; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1236
+ ; AVX512VL: # BB#0:
1237
+ ; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
1238
+ ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1239
+ ; AVX512VL-NEXT: retq
1240
+ ;
1241
+ ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1242
+ ; AVX512VLDQ: # BB#0:
1243
+ ; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
1244
+ ; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1245
+ ; AVX512VLDQ-NEXT: retq
1262
1246
%cvt = fptoui <2 x float > %a to <2 x i32 >
1263
1247
ret <2 x i32 > %cvt
1264
1248
}
0 commit comments