@@ -782,66 +782,7 @@ define <8 x i16> @phaddw_single_source6(<8 x i16> %x) {
782
782
ret <8 x i16 > %shuffle2
783
783
}
784
784
785
- ; PR39921
786
- define i32 @pairwise_reduction_8i32 (<8 x i32 > %rdx ) {
787
- ; SSSE3-SLOW-LABEL: pairwise_reduction_8i32:
788
- ; SSSE3-SLOW: # %bb.0:
789
- ; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm0
790
- ; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
791
- ; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
792
- ; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
793
- ; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
794
- ; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1
795
- ; SSSE3-SLOW-NEXT: movd %xmm1, %eax
796
- ; SSSE3-SLOW-NEXT: retq
797
- ;
798
- ; SSSE3-FAST-LABEL: pairwise_reduction_8i32:
799
- ; SSSE3-FAST: # %bb.0:
800
- ; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm0
801
- ; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0
802
- ; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0
803
- ; SSSE3-FAST-NEXT: movd %xmm0, %eax
804
- ; SSSE3-FAST-NEXT: retq
805
- ;
806
- ; AVX-SLOW-LABEL: pairwise_reduction_8i32:
807
- ; AVX-SLOW: # %bb.0:
808
- ; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
809
- ; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
810
- ; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
811
- ; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0
812
- ; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
813
- ; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
814
- ; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
815
- ; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
816
- ; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
817
- ; AVX-SLOW-NEXT: vmovd %xmm0, %eax
818
- ; AVX-SLOW-NEXT: vzeroupper
819
- ; AVX-SLOW-NEXT: retq
820
- ;
821
- ; AVX-FAST-LABEL: pairwise_reduction_8i32:
822
- ; AVX-FAST: # %bb.0:
823
- ; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
824
- ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
825
- ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
826
- ; AVX-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0
827
- ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
828
- ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
829
- ; AVX-FAST-NEXT: vmovd %xmm0, %eax
830
- ; AVX-FAST-NEXT: vzeroupper
831
- ; AVX-FAST-NEXT: retq
832
- %rdx.shuf.0.0 = shufflevector <8 x i32 > %rdx , <8 x i32 > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 ,i32 undef , i32 undef , i32 undef , i32 undef >
833
- %rdx.shuf.0.1 = shufflevector <8 x i32 > %rdx , <8 x i32 > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 ,i32 undef , i32 undef , i32 undef , i32 undef >
834
- %bin.rdx = add <8 x i32 > %rdx.shuf.0.0 , %rdx.shuf.0.1
835
- %rdx.shuf.1.0 = shufflevector <8 x i32 > %bin.rdx , <8 x i32 > undef ,<8 x i32 > <i32 0 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
836
- %rdx.shuf.1.1 = shufflevector <8 x i32 > %bin.rdx , <8 x i32 > undef ,<8 x i32 > <i32 1 , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
837
- %bin.rdx8 = add <8 x i32 > %rdx.shuf.1.0 , %rdx.shuf.1.1
838
- %rdx.shuf.2.0 = shufflevector <8 x i32 > %bin.rdx8 , <8 x i32 > undef ,<8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
839
- %rdx.shuf.2.1 = shufflevector <8 x i32 > %bin.rdx8 , <8 x i32 > undef ,<8 x i32 > <i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
840
- %bin.rdx9 = add <8 x i32 > %rdx.shuf.2.0 , %rdx.shuf.2.1
841
- %r = extractelement <8 x i32 > %bin.rdx9 , i32 0
842
- ret i32 %r
843
- }
844
-
785
+ ; PR39921 + PR39936
845
786
define i32 @PR39936_v8i32 (<8 x i32 >) {
846
787
; SSSE3-SLOW-LABEL: PR39936_v8i32:
847
788
; SSSE3-SLOW: # %bb.0:
@@ -900,57 +841,3 @@ define i32 @PR39936_v8i32(<8 x i32>) {
900
841
ret i32 %10
901
842
}
902
843
903
- define float @PR39936_v8f32 (<8 x float >) {
904
- ; SSSE3-SLOW-LABEL: PR39936_v8f32:
905
- ; SSSE3-SLOW: # %bb.0:
906
- ; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0
907
- ; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1
908
- ; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
909
- ; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
910
- ; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0
911
- ; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
912
- ; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0
913
- ; SSSE3-SLOW-NEXT: retq
914
- ;
915
- ; SSSE3-FAST-LABEL: PR39936_v8f32:
916
- ; SSSE3-FAST: # %bb.0:
917
- ; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0
918
- ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
919
- ; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0
920
- ; SSSE3-FAST-NEXT: retq
921
- ;
922
- ; AVX-SLOW-LABEL: PR39936_v8f32:
923
- ; AVX-SLOW: # %bb.0:
924
- ; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
925
- ; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
926
- ; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
927
- ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0
928
- ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
929
- ; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
930
- ; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
931
- ; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
932
- ; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
933
- ; AVX-SLOW-NEXT: vzeroupper
934
- ; AVX-SLOW-NEXT: retq
935
- ;
936
- ; AVX-FAST-LABEL: PR39936_v8f32:
937
- ; AVX-FAST: # %bb.0:
938
- ; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1
939
- ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2]
940
- ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
941
- ; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0
942
- ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
943
- ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
944
- ; AVX-FAST-NEXT: vzeroupper
945
- ; AVX-FAST-NEXT: retq
946
- %2 = shufflevector <8 x float > %0 , <8 x float > undef , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 undef , i32 undef , i32 undef , i32 undef >
947
- %3 = shufflevector <8 x float > %0 , <8 x float > undef , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef , i32 undef >
948
- %4 = fadd <8 x float > %2 , %3
949
- %5 = shufflevector <8 x float > %4 , <8 x float > undef , <8 x i32 > <i32 0 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
950
- %6 = shufflevector <8 x float > %4 , <8 x float > undef , <8 x i32 > <i32 1 , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
951
- %7 = fadd <8 x float > %5 , %6
952
- %8 = shufflevector <8 x float > %7 , <8 x float > undef , <8 x i32 > <i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
953
- %9 = fadd <8 x float > %7 , %8
954
- %10 = extractelement <8 x float > %9 , i32 0
955
- ret float %10
956
- }
0 commit comments