@@ -2726,40 +2726,22 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2726
2726
define <8 x i16 > @fptosi_8f64_to_8i16 (<8 x double > %a ) {
2727
2727
; SSE-LABEL: fptosi_8f64_to_8i16:
2728
2728
; SSE: # %bb.0:
2729
+ ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2730
+ ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2731
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2729
2732
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2730
2733
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2731
- ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2732
- ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2733
- ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,1,3,4,5,6,7]
2734
- ; SSE-NEXT: cvttpd2dq %xmm3, %xmm0
2735
- ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2736
- ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2737
- ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,2,0]
2738
- ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
2739
- ; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2734
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2735
+ ; SSE-NEXT: packssdw %xmm2, %xmm0
2740
2736
; SSE-NEXT: retq
2741
2737
;
2742
- ; AVX1-LABEL: fptosi_8f64_to_8i16:
2743
- ; AVX1: # %bb.0:
2744
- ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2745
- ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2746
- ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2747
- ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2748
- ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2749
- ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2750
- ; AVX1-NEXT: vzeroupper
2751
- ; AVX1-NEXT: retq
2752
- ;
2753
- ; AVX2-LABEL: fptosi_8f64_to_8i16:
2754
- ; AVX2: # %bb.0:
2755
- ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2756
- ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2757
- ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2758
- ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2759
- ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2760
- ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2761
- ; AVX2-NEXT: vzeroupper
2762
- ; AVX2-NEXT: retq
2738
+ ; VEX-LABEL: fptosi_8f64_to_8i16:
2739
+ ; VEX: # %bb.0:
2740
+ ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2741
+ ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2742
+ ; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2743
+ ; VEX-NEXT: vzeroupper
2744
+ ; VEX-NEXT: retq
2763
2745
;
2764
2746
; AVX512F-LABEL: fptosi_8f64_to_8i16:
2765
2747
; AVX512F: # %bb.0:
@@ -2797,146 +2779,28 @@ define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2797
2779
define <8 x i16 > @fptoui_8f64_to_8i16 (<8 x double > %a ) {
2798
2780
; SSE-LABEL: fptoui_8f64_to_8i16:
2799
2781
; SSE: # %bb.0:
2800
- ; SSE-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
2801
- ; SSE-NEXT: movapd %xmm1, %xmm5
2802
- ; SSE-NEXT: subsd %xmm4, %xmm5
2803
- ; SSE-NEXT: cvttsd2si %xmm5, %rcx
2804
- ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
2805
- ; SSE-NEXT: xorq %rax, %rcx
2806
- ; SSE-NEXT: cvttsd2si %xmm1, %rdx
2807
- ; SSE-NEXT: ucomisd %xmm4, %xmm1
2808
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2809
- ; SSE-NEXT: movq %rdx, %xmm5
2810
- ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
2811
- ; SSE-NEXT: movapd %xmm1, %xmm6
2812
- ; SSE-NEXT: subsd %xmm4, %xmm6
2813
- ; SSE-NEXT: cvttsd2si %xmm6, %rcx
2814
- ; SSE-NEXT: xorq %rax, %rcx
2815
- ; SSE-NEXT: cvttsd2si %xmm1, %rdx
2816
- ; SSE-NEXT: ucomisd %xmm4, %xmm1
2817
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2818
- ; SSE-NEXT: movq %rdx, %xmm1
2819
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm1[0]
2820
- ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
2821
- ; SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,2,2,3,4,5,6,7]
2822
- ; SSE-NEXT: movapd %xmm0, %xmm1
2823
- ; SSE-NEXT: subsd %xmm4, %xmm1
2824
- ; SSE-NEXT: cvttsd2si %xmm1, %rcx
2825
- ; SSE-NEXT: xorq %rax, %rcx
2826
- ; SSE-NEXT: cvttsd2si %xmm0, %rdx
2827
- ; SSE-NEXT: ucomisd %xmm4, %xmm0
2828
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2829
- ; SSE-NEXT: movq %rdx, %xmm1
2830
- ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2831
- ; SSE-NEXT: movapd %xmm0, %xmm6
2832
- ; SSE-NEXT: subsd %xmm4, %xmm6
2833
- ; SSE-NEXT: cvttsd2si %xmm6, %rcx
2834
- ; SSE-NEXT: xorq %rax, %rcx
2835
- ; SSE-NEXT: cvttsd2si %xmm0, %rdx
2836
- ; SSE-NEXT: ucomisd %xmm4, %xmm0
2837
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2838
- ; SSE-NEXT: movq %rdx, %xmm0
2839
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2840
- ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
2841
- ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
2842
- ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
2843
- ; SSE-NEXT: movapd %xmm3, %xmm0
2844
- ; SSE-NEXT: subsd %xmm4, %xmm0
2845
- ; SSE-NEXT: cvttsd2si %xmm0, %rcx
2846
- ; SSE-NEXT: xorq %rax, %rcx
2847
- ; SSE-NEXT: cvttsd2si %xmm3, %rdx
2848
- ; SSE-NEXT: ucomisd %xmm4, %xmm3
2849
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2850
- ; SSE-NEXT: movq %rdx, %xmm0
2851
- ; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
2852
- ; SSE-NEXT: movapd %xmm3, %xmm5
2853
- ; SSE-NEXT: subsd %xmm4, %xmm5
2854
- ; SSE-NEXT: cvttsd2si %xmm5, %rcx
2855
- ; SSE-NEXT: xorq %rax, %rcx
2856
- ; SSE-NEXT: cvttsd2si %xmm3, %rdx
2857
- ; SSE-NEXT: ucomisd %xmm4, %xmm3
2858
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2859
- ; SSE-NEXT: movq %rdx, %xmm3
2860
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
2782
+ ; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2783
+ ; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2784
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2785
+ ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2786
+ ; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2787
+ ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2788
+ ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2789
+ ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2790
+ ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2791
+ ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2792
+ ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2861
2793
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2862
- ; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
2863
- ; SSE-NEXT: movapd %xmm2, %xmm0
2864
- ; SSE-NEXT: subsd %xmm4, %xmm0
2865
- ; SSE-NEXT: cvttsd2si %xmm0, %rcx
2866
- ; SSE-NEXT: xorq %rax, %rcx
2867
- ; SSE-NEXT: cvttsd2si %xmm2, %rdx
2868
- ; SSE-NEXT: ucomisd %xmm4, %xmm2
2869
- ; SSE-NEXT: cmovaeq %rcx, %rdx
2870
- ; SSE-NEXT: movq %rdx, %xmm0
2871
- ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
2872
- ; SSE-NEXT: movapd %xmm2, %xmm5
2873
- ; SSE-NEXT: subsd %xmm4, %xmm5
2874
- ; SSE-NEXT: cvttsd2si %xmm5, %rcx
2875
- ; SSE-NEXT: xorq %rax, %rcx
2876
- ; SSE-NEXT: cvttsd2si %xmm2, %rax
2877
- ; SSE-NEXT: ucomisd %xmm4, %xmm2
2878
- ; SSE-NEXT: cmovaeq %rcx, %rax
2879
- ; SSE-NEXT: movq %rax, %xmm2
2880
2794
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2881
- ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2882
- ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
2883
- ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2884
- ; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2885
2795
; SSE-NEXT: retq
2886
2796
;
2887
- ; AVX1-LABEL: fptoui_8f64_to_8i16:
2888
- ; AVX1: # %bb.0:
2889
- ; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2890
- ; AVX1-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2891
- ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
2892
- ; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2893
- ; AVX1-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2894
- ; AVX1-NEXT: vcvttpd2dq %ymm4, %xmm4
2895
- ; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2896
- ; AVX1-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2897
- ; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2898
- ; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2899
- ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2900
- ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2901
- ; AVX1-NEXT: vcmpltpd %ymm2, %ymm0, %ymm4
2902
- ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
2903
- ; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
2904
- ; AVX1-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2905
- ; AVX1-NEXT: vcvttpd2dq %ymm2, %xmm2
2906
- ; AVX1-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2907
- ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2908
- ; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm2, %xmm0
2909
- ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2910
- ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2911
- ; AVX1-NEXT: vzeroupper
2912
- ; AVX1-NEXT: retq
2913
- ;
2914
- ; AVX2-LABEL: fptoui_8f64_to_8i16:
2915
- ; AVX2: # %bb.0:
2916
- ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2917
- ; AVX2-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2918
- ; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2919
- ; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2920
- ; AVX2-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2921
- ; AVX2-NEXT: vcvttpd2dq %ymm4, %xmm4
2922
- ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2923
- ; AVX2-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2924
- ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2925
- ; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2926
- ; AVX2-NEXT: vcmpltpd %ymm2, %ymm0, %ymm3
2927
- ; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2928
- ; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2929
- ; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2930
- ; AVX2-NEXT: vcvttpd2dq %ymm2, %xmm2
2931
- ; AVX2-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2932
- ; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2933
- ; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
2934
- ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2935
- ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2936
- ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2937
- ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2938
- ; AVX2-NEXT: vzeroupper
2939
- ; AVX2-NEXT: retq
2797
+ ; VEX-LABEL: fptoui_8f64_to_8i16:
2798
+ ; VEX: # %bb.0:
2799
+ ; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2800
+ ; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2801
+ ; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2802
+ ; VEX-NEXT: vzeroupper
2803
+ ; VEX-NEXT: retq
2940
2804
;
2941
2805
; AVX512F-LABEL: fptoui_8f64_to_8i16:
2942
2806
; AVX512F: # %bb.0:
@@ -2975,31 +2839,23 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2975
2839
; SSE-LABEL: fptosi_16f32_to_16i8:
2976
2840
; SSE: # %bb.0:
2977
2841
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2978
- ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
2979
- ; SSE-NEXT: pand %xmm4, %xmm3
2980
2842
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2981
- ; SSE-NEXT: pand %xmm4, %xmm2
2982
- ; SSE-NEXT: packuswb %xmm3, %xmm2
2843
+ ; SSE-NEXT: packssdw %xmm3, %xmm2
2983
2844
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2984
- ; SSE-NEXT: pand %xmm4, %xmm1
2985
2845
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2986
- ; SSE-NEXT: pand %xmm4, %xmm0
2987
- ; SSE-NEXT: packuswb %xmm1, %xmm0
2988
- ; SSE-NEXT: packuswb %xmm2, %xmm0
2846
+ ; SSE-NEXT: packssdw %xmm1, %xmm0
2847
+ ; SSE-NEXT: packsswb %xmm2, %xmm0
2989
2848
; SSE-NEXT: retq
2990
2849
;
2991
2850
; AVX1-LABEL: fptosi_16f32_to_16i8:
2992
2851
; AVX1: # %bb.0:
2993
2852
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
2994
2853
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2995
2854
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2996
- ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2997
- ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
2998
2855
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2999
- ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3000
- ; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
3001
- ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
3002
- ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2856
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2857
+ ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2858
+ ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3003
2859
; AVX1-NEXT: vzeroupper
3004
2860
; AVX1-NEXT: retq
3005
2861
;
@@ -3008,13 +2864,10 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
3008
2864
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
3009
2865
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
3010
2866
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3011
- ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3012
- ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
3013
2867
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
3014
- ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
3015
- ; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
3016
- ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
3017
- ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2868
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2869
+ ; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2870
+ ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3018
2871
; AVX2-NEXT: vzeroupper
3019
2872
; AVX2-NEXT: retq
3020
2873
;
@@ -3032,16 +2885,11 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
3032
2885
; SSE-LABEL: fptoui_16f32_to_16i8:
3033
2886
; SSE: # %bb.0:
3034
2887
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
3035
- ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
3036
- ; SSE-NEXT: pand %xmm4, %xmm3
3037
2888
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
3038
- ; SSE-NEXT: pand %xmm4, %xmm2
3039
- ; SSE-NEXT: packuswb %xmm3, %xmm2
2889
+ ; SSE-NEXT: packssdw %xmm3, %xmm2
3040
2890
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
3041
- ; SSE-NEXT: pand %xmm4, %xmm1
3042
2891
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
3043
- ; SSE-NEXT: pand %xmm4, %xmm0
3044
- ; SSE-NEXT: packuswb %xmm1, %xmm0
2892
+ ; SSE-NEXT: packssdw %xmm1, %xmm0
3045
2893
; SSE-NEXT: packuswb %xmm2, %xmm0
3046
2894
; SSE-NEXT: retq
3047
2895
;
@@ -3050,12 +2898,9 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
3050
2898
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
3051
2899
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3052
2900
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3053
- ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3054
- ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
3055
2901
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
3056
- ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3057
- ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
3058
- ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
2902
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2903
+ ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
3059
2904
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3060
2905
; AVX1-NEXT: vzeroupper
3061
2906
; AVX1-NEXT: retq
@@ -3065,12 +2910,9 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
3065
2910
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
3066
2911
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
3067
2912
; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3068
- ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3069
- ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
3070
2913
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
3071
- ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
3072
- ; AVX2-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
3073
- ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
2914
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2915
+ ; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
3074
2916
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3075
2917
; AVX2-NEXT: vzeroupper
3076
2918
; AVX2-NEXT: retq
0 commit comments