Skip to content

Commit 5a82a78

Browse files
committedFeb 10, 2019
[DAGCombine] Simplify funnel shifts with undef/zero args to bitshifts
Now that we have SimplifyDemandedBits support for funnel shifts (rL353539), we need to simplify funnel shifts back to bitshifts in cases where either argument has been folded to undef/zero. Differential Revision: https://reviews.llvm.org/D58009 llvm-svn: 353645
1 parent 06a61b0 commit 5a82a78

File tree

2 files changed

+77
-42
lines changed

2 files changed

+77
-42
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+41-2
Original file line numberDiff line numberDiff line change
@@ -7127,13 +7127,52 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
71277127
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
71287128
return IsFSHL ? N0 : N1;
71297129

7130-
// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7130+
auto IsUndefOrZero = [](SDValue V) {
7131+
if (V.isUndef())
7132+
return true;
7133+
if (ConstantSDNode *Cst = isConstOrConstSplat(V, /*AllowUndefs*/true))
7134+
return Cst->getAPIntValue() == 0;
7135+
return false;
7136+
};
7137+
71317138
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7139+
EVT ShAmtTy = N2.getValueType();
7140+
7141+
// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
71327142
if (Cst->getAPIntValue().uge(BitWidth)) {
71337143
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
71347144
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7135-
DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
7145+
DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
71367146
}
7147+
7148+
unsigned ShAmt = Cst->getZExtValue();
7149+
if (ShAmt == 0)
7150+
return IsFSHL ? N0 : N1;
7151+
7152+
// fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7153+
// fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7154+
// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7155+
// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7156+
if (IsUndefOrZero(N0))
7157+
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7158+
DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7159+
SDLoc(N), ShAmtTy));
7160+
if (IsUndefOrZero(N1))
7161+
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7162+
DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7163+
SDLoc(N), ShAmtTy));
7164+
}
7165+
7166+
// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7167+
// fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7168+
// iff We know the shift amount is in range.
7169+
// TODO: when is it worth doing SUB(BW, N2) as well?
7170+
if (isPowerOf2_32(BitWidth)) {
7171+
APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7172+
if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7173+
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7174+
if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7175+
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
71377176
}
71387177

71397178
// fold (fshl N0, N0, N2) -> (rotl N0, N2)

‎llvm/test/CodeGen/X86/funnel-shift.ll

+36-40
Original file line numberDiff line numberDiff line change
@@ -404,12 +404,13 @@ define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
404404
; X32-SSE2-LABEL: fshl_i32_undef0_cst:
405405
; X32-SSE2: # %bb.0:
406406
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
407-
; X32-SSE2-NEXT: shldl $9, %eax, %eax
407+
; X32-SSE2-NEXT: shrl $23, %eax
408408
; X32-SSE2-NEXT: retl
409409
;
410410
; X64-AVX2-LABEL: fshl_i32_undef0_cst:
411411
; X64-AVX2: # %bb.0:
412-
; X64-AVX2-NEXT: shldl $9, %edi, %eax
412+
; X64-AVX2-NEXT: movl %edi, %eax
413+
; X64-AVX2-NEXT: shrl $23, %eax
413414
; X64-AVX2-NEXT: retq
414415
%res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
415416
ret i32 %res
@@ -438,19 +439,18 @@ define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
438439
; X32-SSE2-LABEL: fshl_i32_undef1_msk:
439440
; X32-SSE2: # %bb.0:
440441
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
441-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
442-
; X32-SSE2-NEXT: andl $7, %ecx
443-
; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
444-
; X32-SSE2-NEXT: shldl %cl, %eax, %eax
442+
; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
443+
; X32-SSE2-NEXT: andb $7, %cl
444+
; X32-SSE2-NEXT: shll %cl, %eax
445445
; X32-SSE2-NEXT: retl
446446
;
447447
; X64-AVX2-LABEL: fshl_i32_undef1_msk:
448448
; X64-AVX2: # %bb.0:
449449
; X64-AVX2-NEXT: movl %esi, %ecx
450450
; X64-AVX2-NEXT: movl %edi, %eax
451-
; X64-AVX2-NEXT: andl $7, %ecx
451+
; X64-AVX2-NEXT: andb $7, %cl
452452
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
453-
; X64-AVX2-NEXT: shldl %cl, %eax, %eax
453+
; X64-AVX2-NEXT: shll %cl, %eax
454454
; X64-AVX2-NEXT: retq
455455
%m = and i32 %a1, 7
456456
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
@@ -461,13 +461,13 @@ define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
461461
; X32-SSE2-LABEL: fshl_i32_undef1_cst:
462462
; X32-SSE2: # %bb.0:
463463
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
464-
; X32-SSE2-NEXT: shldl $9, %eax, %eax
464+
; X32-SSE2-NEXT: shll $9, %eax
465465
; X32-SSE2-NEXT: retl
466466
;
467467
; X64-AVX2-LABEL: fshl_i32_undef1_cst:
468468
; X64-AVX2: # %bb.0:
469469
; X64-AVX2-NEXT: movl %edi, %eax
470-
; X64-AVX2-NEXT: shldl $9, %eax, %eax
470+
; X64-AVX2-NEXT: shll $9, %eax
471471
; X64-AVX2-NEXT: retq
472472
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
473473
ret i32 %res
@@ -513,19 +513,18 @@ define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
513513
; X32-SSE2-LABEL: fshr_i32_undef0_msk:
514514
; X32-SSE2: # %bb.0:
515515
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
516-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
517-
; X32-SSE2-NEXT: andl $7, %ecx
518-
; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
519-
; X32-SSE2-NEXT: shrdl %cl, %eax, %eax
516+
; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
517+
; X32-SSE2-NEXT: andb $7, %cl
518+
; X32-SSE2-NEXT: shrl %cl, %eax
520519
; X32-SSE2-NEXT: retl
521520
;
522521
; X64-AVX2-LABEL: fshr_i32_undef0_msk:
523522
; X64-AVX2: # %bb.0:
524523
; X64-AVX2-NEXT: movl %esi, %ecx
525524
; X64-AVX2-NEXT: movl %edi, %eax
526-
; X64-AVX2-NEXT: andl $7, %ecx
525+
; X64-AVX2-NEXT: andb $7, %cl
527526
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
528-
; X64-AVX2-NEXT: shrdl %cl, %eax, %eax
527+
; X64-AVX2-NEXT: shrl %cl, %eax
529528
; X64-AVX2-NEXT: retq
530529
%m = and i32 %a1, 7
531530
%res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
@@ -536,13 +535,13 @@ define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
536535
; X32-SSE2-LABEL: fshr_i32_undef0_cst:
537536
; X32-SSE2: # %bb.0:
538537
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
539-
; X32-SSE2-NEXT: shrdl $9, %eax, %eax
538+
; X32-SSE2-NEXT: shrl $9, %eax
540539
; X32-SSE2-NEXT: retl
541540
;
542541
; X64-AVX2-LABEL: fshr_i32_undef0_cst:
543542
; X64-AVX2: # %bb.0:
544543
; X64-AVX2-NEXT: movl %edi, %eax
545-
; X64-AVX2-NEXT: shrdl $9, %eax, %eax
544+
; X64-AVX2-NEXT: shrl $9, %eax
546545
; X64-AVX2-NEXT: retq
547546
%res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
548547
ret i32 %res
@@ -592,12 +591,13 @@ define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
592591
; X32-SSE2-LABEL: fshr_i32_undef1_cst:
593592
; X32-SSE2: # %bb.0:
594593
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
595-
; X32-SSE2-NEXT: shrdl $9, %eax, %eax
594+
; X32-SSE2-NEXT: shll $23, %eax
596595
; X32-SSE2-NEXT: retl
597596
;
598597
; X64-AVX2-LABEL: fshr_i32_undef1_cst:
599598
; X64-AVX2: # %bb.0:
600-
; X64-AVX2-NEXT: shrdl $9, %edi, %eax
599+
; X64-AVX2-NEXT: movl %edi, %eax
600+
; X64-AVX2-NEXT: shll $23, %eax
601601
; X64-AVX2-NEXT: retq
602602
%res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
603603
ret i32 %res
@@ -645,15 +645,14 @@ define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
645645
define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
646646
; X32-SSE2-LABEL: fshl_i32_zero0_cst:
647647
; X32-SSE2: # %bb.0:
648-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
649-
; X32-SSE2-NEXT: xorl %eax, %eax
650-
; X32-SSE2-NEXT: shldl $9, %ecx, %eax
648+
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
649+
; X32-SSE2-NEXT: shrl $23, %eax
651650
; X32-SSE2-NEXT: retl
652651
;
653652
; X64-AVX2-LABEL: fshl_i32_zero0_cst:
654653
; X64-AVX2: # %bb.0:
655-
; X64-AVX2-NEXT: xorl %eax, %eax
656-
; X64-AVX2-NEXT: shldl $9, %edi, %eax
654+
; X64-AVX2-NEXT: movl %edi, %eax
655+
; X64-AVX2-NEXT: shrl $23, %eax
657656
; X64-AVX2-NEXT: retq
658657
%res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
659658
ret i32 %res
@@ -683,15 +682,14 @@ define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
683682
define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
684683
; X32-SSE2-LABEL: fshl_i32_zero1_cst:
685684
; X32-SSE2: # %bb.0:
686-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
687-
; X32-SSE2-NEXT: xorl %eax, %eax
688-
; X32-SSE2-NEXT: shrdl $23, %ecx, %eax
685+
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
686+
; X32-SSE2-NEXT: shll $9, %eax
689687
; X32-SSE2-NEXT: retl
690688
;
691689
; X64-AVX2-LABEL: fshl_i32_zero1_cst:
692690
; X64-AVX2: # %bb.0:
693-
; X64-AVX2-NEXT: xorl %eax, %eax
694-
; X64-AVX2-NEXT: shrdl $23, %edi, %eax
691+
; X64-AVX2-NEXT: movl %edi, %eax
692+
; X64-AVX2-NEXT: shll $9, %eax
695693
; X64-AVX2-NEXT: retq
696694
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
697695
ret i32 %res
@@ -721,15 +719,14 @@ define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
721719
define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
722720
; X32-SSE2-LABEL: fshr_i32_zero0_cst:
723721
; X32-SSE2: # %bb.0:
724-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
725-
; X32-SSE2-NEXT: xorl %eax, %eax
726-
; X32-SSE2-NEXT: shldl $23, %ecx, %eax
722+
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
723+
; X32-SSE2-NEXT: shrl $9, %eax
727724
; X32-SSE2-NEXT: retl
728725
;
729726
; X64-AVX2-LABEL: fshr_i32_zero0_cst:
730727
; X64-AVX2: # %bb.0:
731-
; X64-AVX2-NEXT: xorl %eax, %eax
732-
; X64-AVX2-NEXT: shldl $23, %edi, %eax
728+
; X64-AVX2-NEXT: movl %edi, %eax
729+
; X64-AVX2-NEXT: shrl $9, %eax
733730
; X64-AVX2-NEXT: retq
734731
%res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
735732
ret i32 %res
@@ -758,15 +755,14 @@ define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
758755
define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
759756
; X32-SSE2-LABEL: fshr_i32_zero1_cst:
760757
; X32-SSE2: # %bb.0:
761-
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
762-
; X32-SSE2-NEXT: xorl %eax, %eax
763-
; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
758+
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
759+
; X32-SSE2-NEXT: shll $23, %eax
764760
; X32-SSE2-NEXT: retl
765761
;
766762
; X64-AVX2-LABEL: fshr_i32_zero1_cst:
767763
; X64-AVX2: # %bb.0:
768-
; X64-AVX2-NEXT: xorl %eax, %eax
769-
; X64-AVX2-NEXT: shrdl $9, %edi, %eax
764+
; X64-AVX2-NEXT: movl %edi, %eax
765+
; X64-AVX2-NEXT: shll $23, %eax
770766
; X64-AVX2-NEXT: retq
771767
%res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
772768
ret i32 %res

0 commit comments

Comments
 (0)
Please sign in to comment.