Skip to content

Commit 07adb6a

Browse files
committedApr 7, 2019
[X86][SSE] SimplifyDemandedBitsForTargetNode - Add initial PACKSS support
In the case where we only want the sign bit (e.g. when using PACKSS truncation of comparison results for MOVMSK) then we can just demand the sign bit of the source operands. This makes use of the fact that PACKSS saturates out of range values to the min/max int values - so the sign bit is always preserved. Differential Revision: https://reviews.llvm.org/D60333 llvm-svn: 357859
1 parent 47a7662 commit 07adb6a

File tree

6 files changed

+67
-187
lines changed

6 files changed

+67
-187
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -33508,6 +33508,25 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
3350833508
}
3350933509
break;
3351033510
}
33511+
case X86ISD::PACKSS:
33512+
// PACKSS saturates to MIN/MAX integer values. So if we just want the
33513+
// sign bit then we can just ask for the source operands sign bit.
33514+
// TODO - add known bits handling.
33515+
if (OriginalDemandedBits.isSignMask()) {
33516+
APInt DemandedLHS, DemandedRHS;
33517+
getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS);
33518+
33519+
KnownBits KnownLHS, KnownRHS;
33520+
APInt SignMask = APInt::getSignMask(BitWidth * 2);
33521+
if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS,
33522+
KnownLHS, TLO, Depth + 1))
33523+
return true;
33524+
if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
33525+
KnownRHS, TLO, Depth + 1))
33526+
return true;
33527+
}
33528+
// TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
33529+
break;
3351133530
case X86ISD::PCMPGT:
3351233531
// icmp sgt(0, R) == ashr(R, BitWidth-1).
3351333532
// iff we only need the sign bit then we can use R directly.

‎llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll

-3
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
7474
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
7575
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7676
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
77-
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
7877
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
7978
; AVX1-NEXT: vpmovmskb %xmm0, %eax
8079
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -99,7 +98,6 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
9998
; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
10099
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
101100
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
102-
; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
103101
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
104102
; AVX2-NEXT: vpmovmskb %xmm0, %eax
105103
; AVX2-NEXT: # kill: def $al killed $al killed $eax
@@ -191,7 +189,6 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double>
191189
; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
192190
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
193191
; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
194-
; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0
195192
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
196193
; AVX12-NEXT: vpmovmskb %xmm0, %eax
197194
; AVX12-NEXT: # kill: def $al killed $al killed $eax

‎llvm/test/CodeGen/X86/bitcast-setcc-128.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -765,17 +765,13 @@ define void @bitcast_16i8_store(i16* %p, <16 x i8> %a0) {
765765
define void @bitcast_8i16_store(i8* %p, <8 x i16> %a0) {
766766
; SSE2-SSSE3-LABEL: bitcast_8i16_store:
767767
; SSE2-SSSE3: # %bb.0:
768-
; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
769-
; SSE2-SSSE3-NEXT: pcmpgtw %xmm0, %xmm1
770-
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1
771-
; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
768+
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
769+
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
772770
; SSE2-SSSE3-NEXT: movb %al, (%rdi)
773771
; SSE2-SSSE3-NEXT: retq
774772
;
775773
; AVX12-LABEL: bitcast_8i16_store:
776774
; AVX12: # %bb.0:
777-
; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
778-
; AVX12-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
779775
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
780776
; AVX12-NEXT: vpmovmskb %xmm0, %eax
781777
; AVX12-NEXT: movb %al, (%rdi)

‎llvm/test/CodeGen/X86/bitcast-setcc-256.ll

+5-16
Original file line numberDiff line numberDiff line change
@@ -359,21 +359,14 @@ define void @bitcast_32i8_store(i32* %p, <32 x i8> %a0) {
359359
define void @bitcast_16i16_store(i16* %p, <16 x i16> %a0) {
360360
; SSE2-SSSE3-LABEL: bitcast_16i16_store:
361361
; SSE2-SSSE3: # %bb.0:
362-
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
363-
; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
364-
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm3
365-
; SSE2-SSSE3-NEXT: pcmpgtw %xmm0, %xmm2
366-
; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
367-
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
362+
; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
363+
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
368364
; SSE2-SSSE3-NEXT: movw %ax, (%rdi)
369365
; SSE2-SSSE3-NEXT: retq
370366
;
371367
; AVX1-LABEL: bitcast_16i16_store:
372368
; AVX1: # %bb.0:
373369
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
374-
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
375-
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
376-
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
377370
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
378371
; AVX1-NEXT: vpmovmskb %xmm0, %eax
379372
; AVX1-NEXT: movw %ax, (%rdi)
@@ -416,13 +409,9 @@ define void @bitcast_16i16_store(i16* %p, <16 x i16> %a0) {
416409
define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) {
417410
; SSE2-SSSE3-LABEL: bitcast_8i32_store:
418411
; SSE2-SSSE3: # %bb.0:
419-
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm2
420-
; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm3
421-
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
422-
; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
423-
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
424-
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2
425-
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
412+
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
413+
; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
414+
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
426415
; SSE2-SSSE3-NEXT: movb %al, (%rdi)
427416
; SSE2-SSSE3-NEXT: retq
428417
;

‎llvm/test/CodeGen/X86/bitcast-setcc-512.ll

+8-37
Original file line numberDiff line numberDiff line change
@@ -481,18 +481,10 @@ define void @bitcast_64i8_store(i64* %p, <64 x i8> %a0) {
481481
define void @bitcast_32i16_store(i32* %p, <32 x i16> %a0) {
482482
; SSE-LABEL: bitcast_32i16_store:
483483
; SSE: # %bb.0:
484-
; SSE-NEXT: pxor %xmm4, %xmm4
485-
; SSE-NEXT: pxor %xmm5, %xmm5
486-
; SSE-NEXT: pcmpgtw %xmm1, %xmm5
487-
; SSE-NEXT: pxor %xmm1, %xmm1
488-
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
489-
; SSE-NEXT: packsswb %xmm5, %xmm1
490-
; SSE-NEXT: pmovmskb %xmm1, %eax
491-
; SSE-NEXT: pxor %xmm0, %xmm0
492-
; SSE-NEXT: pcmpgtw %xmm3, %xmm0
493-
; SSE-NEXT: pcmpgtw %xmm2, %xmm4
494-
; SSE-NEXT: packsswb %xmm0, %xmm4
495-
; SSE-NEXT: pmovmskb %xmm4, %ecx
484+
; SSE-NEXT: packsswb %xmm1, %xmm0
485+
; SSE-NEXT: pmovmskb %xmm0, %eax
486+
; SSE-NEXT: packsswb %xmm3, %xmm2
487+
; SSE-NEXT: pmovmskb %xmm2, %ecx
496488
; SSE-NEXT: shll $16, %ecx
497489
; SSE-NEXT: orl %eax, %ecx
498490
; SSE-NEXT: movl %ecx, (%rdi)
@@ -501,14 +493,9 @@ define void @bitcast_32i16_store(i32* %p, <32 x i16> %a0) {
501493
; AVX1-LABEL: bitcast_32i16_store:
502494
; AVX1: # %bb.0:
503495
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
504-
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
505-
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
506-
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
507496
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
508497
; AVX1-NEXT: vpmovmskb %xmm0, %eax
509498
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
510-
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
511-
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1
512499
; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
513500
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
514501
; AVX1-NEXT: shll $16, %ecx
@@ -519,9 +506,6 @@ define void @bitcast_32i16_store(i32* %p, <32 x i16> %a0) {
519506
;
520507
; AVX2-LABEL: bitcast_32i16_store:
521508
; AVX2: # %bb.0:
522-
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
523-
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
524-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
525509
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
526510
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
527511
; AVX2-NEXT: vpmovmskb %ymm0, %eax
@@ -558,31 +542,18 @@ define void @bitcast_32i16_store(i32* %p, <32 x i16> %a0) {
558542
define void @bitcast_16i32_store(i16* %p, <16 x i32> %a0) {
559543
; SSE-LABEL: bitcast_16i32_store:
560544
; SSE: # %bb.0:
561-
; SSE-NEXT: pxor %xmm4, %xmm4
562-
; SSE-NEXT: pxor %xmm5, %xmm5
563-
; SSE-NEXT: pcmpgtd %xmm3, %xmm5
564-
; SSE-NEXT: pxor %xmm3, %xmm3
565-
; SSE-NEXT: pcmpgtd %xmm2, %xmm3
566-
; SSE-NEXT: packssdw %xmm5, %xmm3
567-
; SSE-NEXT: pxor %xmm2, %xmm2
568-
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
569-
; SSE-NEXT: pcmpgtd %xmm0, %xmm4
570-
; SSE-NEXT: packssdw %xmm2, %xmm4
571-
; SSE-NEXT: packsswb %xmm3, %xmm4
572-
; SSE-NEXT: pmovmskb %xmm4, %eax
545+
; SSE-NEXT: packssdw %xmm3, %xmm2
546+
; SSE-NEXT: packssdw %xmm1, %xmm0
547+
; SSE-NEXT: packsswb %xmm2, %xmm0
548+
; SSE-NEXT: pmovmskb %xmm0, %eax
573549
; SSE-NEXT: movw %ax, (%rdi)
574550
; SSE-NEXT: retq
575551
;
576552
; AVX1-LABEL: bitcast_16i32_store:
577553
; AVX1: # %bb.0:
578554
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
579-
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
580-
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
581-
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm3, %xmm1
582555
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
583556
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
584-
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
585-
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm3, %xmm0
586557
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
587558
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
588559
; AVX1-NEXT: vpmovmskb %xmm0, %eax

0 commit comments

Comments
 (0)