Skip to content

Commit b356d04

Browse files
committedNov 20, 2018
[TargetLowering] Improve SimplifyDemandedVectorElts/SimplifyDemandedBits support
For bitcast nodes from larger element types, add the ability for SimplifyDemandedVectorElts to call SimplifyDemandedBits by merging the elts mask to a bits mask. I've raised https://bugs.llvm.org/show_bug.cgi?id=39689 to deal with the few places where SimplifyDemandedBits's lack of vector handling is a problem. Differential Revision: https://reviews.llvm.org/D54679 llvm-svn: 347301
1 parent a6fb85f commit b356d04

File tree

7 files changed

+96
-518
lines changed

7 files changed

+96
-518
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -1460,6 +1460,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
14601460
TLO, Depth + 1))
14611461
return true;
14621462

1463+
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
1464+
// of the large element.
1465+
// TODO - bigendian once we have test coverage.
1466+
if (TLO.DAG.getDataLayout().isLittleEndian()) {
1467+
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
1468+
APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
1469+
for (unsigned i = 0; i != NumElts; ++i)
1470+
if (DemandedElts[i]) {
1471+
unsigned Ofs = (i % Scale) * EltSizeInBits;
1472+
SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
1473+
}
1474+
1475+
KnownBits Known;
1476+
if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
1477+
return true;
1478+
}
1479+
14631480
// If the src element is zero/undef then all the output elements will be -
14641481
// only demanded elements are guaranteed to be correct.
14651482
for (unsigned i = 0; i != NumSrcElts; ++i) {

‎llvm/test/CodeGen/X86/known-bits-vector.ll

+2-10
Original file line numberDiff line numberDiff line change
@@ -158,20 +158,12 @@ define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
158158
define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
159159
; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
160160
; X32: # %bb.0:
161-
; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
162-
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
163-
; X32-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
164-
; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
165-
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
161+
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
166162
; X32-NEXT: retl
167163
;
168164
; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
169165
; X64: # %bb.0:
170-
; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
171-
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
172-
; X64-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
173-
; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
174-
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
166+
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
175167
; X64-NEXT: retq
176168
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
177169
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>

‎llvm/test/CodeGen/X86/known-signbits-vector.ll

+4-9
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1)
230230
ret <4 x double> %3
231231
}
232232

233+
; TODO: Fix vpshufd+vpsrlq -> vpshufd/vpermilps
233234
define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind {
234235
; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
235236
; X32: # %bb.0:
@@ -239,7 +240,8 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4
239240
;
240241
; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
241242
; X64: # %bb.0:
242-
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
243+
; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
244+
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
243245
; X64-NEXT: vcvtdq2pd %xmm0, %xmm0
244246
; X64-NEXT: retq
245247
%1 = ashr <2 x i64> %a0, <i64 16, i64 16>
@@ -255,20 +257,13 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
255257
; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
256258
; X32: # %bb.0:
257259
; X32-NEXT: pushl %eax
258-
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
259260
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
260261
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
261262
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
262263
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
263264
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
264265
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
265-
; X32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
266-
; X32-NEXT: sarl $31, %eax
267-
; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
268-
; X32-NEXT: vpsllq $20, %xmm1, %xmm1
269-
; X32-NEXT: vpsrad $20, %xmm1, %xmm2
270-
; X32-NEXT: vpsrlq $20, %xmm1, %xmm1
271-
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
266+
; X32-NEXT: vpinsrd $0, {{[0-9]+}}(%esp), %xmm1, %xmm1
272267
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
273268
; X32-NEXT: vmovd %xmm0, %eax
274269
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0

‎llvm/test/CodeGen/X86/reduce-trunc-shl.ll

+1-5
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,7 @@ define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) {
7272
;
7373
; AVX2-LABEL: trunc_shl_17_v8i16_v8i32:
7474
; AVX2: # %bb.0:
75-
; AVX2-NEXT: vpslld $17, %ymm0, %ymm0
76-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
77-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
78-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
79-
; AVX2-NEXT: vzeroupper
75+
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
8076
; AVX2-NEXT: retq
8177
%shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
8278
%conv = trunc <8 x i32> %shl to <8 x i16>

‎llvm/test/CodeGen/X86/shrink_vmul.ll

+10-40
Original file line numberDiff line numberDiff line change
@@ -1154,10 +1154,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
11541154
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
11551155
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
11561156
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
1157-
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
1158-
; X86-SSE-NEXT: psllq $32, %xmm2
1159-
; X86-SSE-NEXT: paddq %xmm1, %xmm2
1160-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1157+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
11611158
; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
11621159
; X86-SSE-NEXT: popl %esi
11631160
; X86-SSE-NEXT: retl
@@ -1191,10 +1188,7 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
11911188
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
11921189
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
11931190
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
1194-
; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
1195-
; X64-SSE-NEXT: psllq $32, %xmm2
1196-
; X64-SSE-NEXT: paddq %xmm1, %xmm2
1197-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1191+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
11981192
; X64-SSE-NEXT: movq %xmm0, (%rax,%rdx,4)
11991193
; X64-SSE-NEXT: retq
12001194
;
@@ -1952,15 +1946,7 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
19521946
; X86-SSE-NEXT: pxor %xmm1, %xmm1
19531947
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
19541948
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
1955-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,u,65536,u>
1956-
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
1957-
; X86-SSE-NEXT: movdqa %xmm2, %xmm3
1958-
; X86-SSE-NEXT: psrlq $32, %xmm3
1959-
; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
1960-
; X86-SSE-NEXT: paddq %xmm1, %xmm3
1961-
; X86-SSE-NEXT: psllq $32, %xmm3
1962-
; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
1963-
; X86-SSE-NEXT: paddq %xmm3, %xmm0
1949+
; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
19641950
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
19651951
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
19661952
; X86-SSE-NEXT: retl
@@ -1986,13 +1972,10 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
19861972
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
19871973
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
19881974
; X64-SSE-NEXT: movl $65536, %ecx # imm = 0x10000
1989-
; X64-SSE-NEXT: movq %rcx, %xmm2
1990-
; X64-SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
1991-
; X64-SSE-NEXT: pmuludq %xmm2, %xmm0
1992-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
1993-
; X64-SSE-NEXT: psllq $32, %xmm2
1994-
; X64-SSE-NEXT: paddq %xmm0, %xmm2
1995-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
1975+
; X64-SSE-NEXT: movq %rcx, %xmm1
1976+
; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
1977+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
1978+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
19961979
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
19971980
; X64-SSE-NEXT: retq
19981981
;
@@ -2037,16 +2020,7 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
20372020
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
20382021
; X86-SSE-NEXT: psrad $16, %xmm0
20392022
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
2040-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,u,32768,u>
2041-
; X86-SSE-NEXT: pxor %xmm2, %xmm2
2042-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
2043-
; X86-SSE-NEXT: movdqa %xmm1, %xmm3
2044-
; X86-SSE-NEXT: psrlq $32, %xmm3
2045-
; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
2046-
; X86-SSE-NEXT: paddq %xmm2, %xmm3
2047-
; X86-SSE-NEXT: psllq $32, %xmm3
2048-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
2049-
; X86-SSE-NEXT: paddq %xmm3, %xmm0
2023+
; X86-SSE-NEXT: pmuludq {{\.LCPI.*}}, %xmm0
20502024
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
20512025
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
20522026
; X86-SSE-NEXT: retl
@@ -2072,12 +2046,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
20722046
; X64-SSE-NEXT: movl $32768, %ecx # imm = 0x8000
20732047
; X64-SSE-NEXT: movq %rcx, %xmm1
20742048
; X64-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
2075-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
2076-
; X64-SSE-NEXT: pxor %xmm2, %xmm2
2077-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
2078-
; X64-SSE-NEXT: psllq $32, %xmm2
2079-
; X64-SSE-NEXT: paddq %xmm0, %xmm2
2080-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
2049+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
2050+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
20812051
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
20822052
; X64-SSE-NEXT: retq
20832053
;

0 commit comments

Comments
 (0)
Please sign in to comment.