@@ -1154,10 +1154,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
1154
1154
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1155
1155
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
1156
1156
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
1157
- ; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
1158
- ; X86-SSE-NEXT: psllq $32, %xmm2
1159
- ; X86-SSE-NEXT: paddq %xmm1, %xmm2
1160
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1157
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1161
1158
; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
1162
1159
; X86-SSE-NEXT: popl %esi
1163
1160
; X86-SSE-NEXT: retl
@@ -1191,10 +1188,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
1191
1188
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1192
1189
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
1193
1190
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
1194
- ; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
1195
- ; X64-SSE-NEXT: psllq $32, %xmm2
1196
- ; X64-SSE-NEXT: paddq %xmm1, %xmm2
1197
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1191
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1198
1192
; X64-SSE-NEXT: movq %xmm0, (%rax,%rdx,4)
1199
1193
; X64-SSE-NEXT: retq
1200
1194
;
@@ -1952,15 +1946,7 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
1952
1946
; X86-SSE-NEXT: pxor %xmm1, %xmm1
1953
1947
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1954
1948
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1955
- ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,u,65536,u>
1956
- ; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
1957
- ; X86-SSE-NEXT: movdqa %xmm2, %xmm3
1958
- ; X86-SSE-NEXT: psrlq $32, %xmm3
1959
- ; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
1960
- ; X86-SSE-NEXT: paddq %xmm1, %xmm3
1961
- ; X86-SSE-NEXT: psllq $32, %xmm3
1962
- ; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
1963
- ; X86-SSE-NEXT: paddq %xmm3, %xmm0
1949
+ ; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
1964
1950
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1965
1951
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
1966
1952
; X86-SSE-NEXT: retl
@@ -1986,13 +1972,10 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
1986
1972
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1987
1973
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1988
1974
; X64-SSE-NEXT: movl $65536, %ecx # imm = 0x10000
1989
- ; X64-SSE-NEXT: movq %rcx, %xmm2
1990
- ; X64-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
1991
- ; X64-SSE-NEXT: pmuludq %xmm2, %xmm0
1992
- ; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
1993
- ; X64-SSE-NEXT: psllq $32, %xmm2
1994
- ; X64-SSE-NEXT: paddq %xmm0, %xmm2
1995
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1975
+ ; X64-SSE-NEXT: movq %rcx, %xmm1
1976
+ ; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
1977
+ ; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
1978
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1996
1979
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
1997
1980
; X64-SSE-NEXT: retq
1998
1981
;
@@ -2037,16 +2020,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
2037
2020
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2038
2021
; X86-SSE-NEXT: psrad $16, %xmm0
2039
2022
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2040
- ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,u,32768,u>
2041
- ; X86-SSE-NEXT: pxor %xmm2, %xmm2
2042
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
2043
- ; X86-SSE-NEXT: movdqa %xmm1, %xmm3
2044
- ; X86-SSE-NEXT: psrlq $32, %xmm3
2045
- ; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
2046
- ; X86-SSE-NEXT: paddq %xmm2, %xmm3
2047
- ; X86-SSE-NEXT: psllq $32, %xmm3
2048
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
2049
- ; X86-SSE-NEXT: paddq %xmm3, %xmm0
2023
+ ; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
2050
2024
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2051
2025
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
2052
2026
; X86-SSE-NEXT: retl
@@ -2072,12 +2046,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
2072
2046
; X64-SSE-NEXT: movl $32768, %ecx # imm = 0x8000
2073
2047
; X64-SSE-NEXT: movq %rcx, %xmm1
2074
2048
; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
2075
- ; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
2076
- ; X64-SSE-NEXT: pxor %xmm2, %xmm2
2077
- ; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
2078
- ; X64-SSE-NEXT: psllq $32, %xmm2
2079
- ; X64-SSE-NEXT: paddq %xmm0, %xmm2
2080
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
2049
+ ; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
2050
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
2081
2051
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
2082
2052
; X64-SSE-NEXT: retq
2083
2053
;
0 commit comments