Skip to content

Commit 0658a95

Browse files
committedApr 19, 2017
[DAG] add splat vector support for 'or' in SimplifyDemandedBits
I've changed one of the tests to not fold away, but we didn't and still don't do the transform that the comment claims we do (and I don't know why we'd want to do that). Follow-up to: https://reviews.llvm.org/rL300725 https://reviews.llvm.org/rL300763 llvm-svn: 300772
1 parent 4a3a870 commit 0658a95

File tree

3 files changed

+16
-21
lines changed

3 files changed

+16
-21
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
42254225
return Load;
42264226

42274227
// Simplify the operands using demanded-bits information.
4228-
if (!VT.isVector() &&
4229-
SimplifyDemandedBits(SDValue(N, 0)))
4228+
if (SimplifyDemandedBits(SDValue(N, 0)))
42304229
return SDValue(N, 0);
42314230

42324231
return SDValue();

‎llvm/test/CodeGen/X86/combine-or.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
430430
ret <4 x i32> %or
431431
}
432432

433+
; TODO: Why would we do this?
433434
; (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
434435

435436
define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
@@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
438439
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
439440
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
440441
; CHECK-NEXT: retq
441-
%1 = and <2 x i64> %a0, <i64 1, i64 1>
442+
%1 = and <2 x i64> %a0, <i64 7, i64 7>
442443
%2 = or <2 x i64> %1, <i64 3, i64 3>
443444
ret <2 x i64> %2
444445
}
445446

447+
; If all masked bits are going to be set, that's a constant fold.
448+
446449
define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
447450
; CHECK-LABEL: or_and_v4i32:
448451
; CHECK: # BB#0:
449-
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
450-
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
452+
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
451453
; CHECK-NEXT: retq
452454
%1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
453455
%2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
@@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
459461
define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
460462
; CHECK-LABEL: or_zext_v2i32:
461463
; CHECK: # BB#0:
462-
; CHECK-NEXT: pxor %xmm1, %xmm1
463-
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
464-
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
464+
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295]
465465
; CHECK-NEXT: retq
466466
%1 = zext <2 x i32> %a0 to <2 x i64>
467467
%2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
@@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
471471
define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
472472
; CHECK-LABEL: or_zext_v4i16:
473473
; CHECK: # BB#0:
474-
; CHECK-NEXT: pxor %xmm1, %xmm1
475-
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
476-
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
474+
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
477475
; CHECK-NEXT: retq
478476
%1 = zext <4 x i16> %a0 to <4 x i32>
479477
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>

‎llvm/test/CodeGen/X86/i64-to-float.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
237237
; X64-SSE-NEXT: pandn %xmm0, %xmm2
238238
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
239239
; X64-SSE-NEXT: por %xmm2, %xmm3
240-
; X64-SSE-NEXT: movdqa %xmm3, %xmm0
241-
; X64-SSE-NEXT: pxor %xmm1, %xmm0
242-
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
243-
; X64-SSE-NEXT: por %xmm2, %xmm1
244-
; X64-SSE-NEXT: movdqa %xmm0, %xmm4
245-
; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
246-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
240+
; X64-SSE-NEXT: pxor %xmm3, %xmm1
241+
; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
242+
; X64-SSE-NEXT: movdqa %xmm1, %xmm2
243+
; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2
244+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
247245
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
248246
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
249-
; X64-SSE-NEXT: pand %xmm5, %xmm0
250-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
247+
; X64-SSE-NEXT: pand %xmm4, %xmm0
248+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
251249
; X64-SSE-NEXT: por %xmm0, %xmm1
252250
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
253251
; X64-SSE-NEXT: pandn %xmm3, %xmm0
254-
; X64-SSE-NEXT: pand %xmm2, %xmm1
252+
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
255253
; X64-SSE-NEXT: por %xmm0, %xmm1
256254
; X64-SSE-NEXT: movd %xmm1, %rax
257255
; X64-SSE-NEXT: xorps %xmm0, %xmm0

0 commit comments

Comments
 (0)
Please sign in to comment.