Skip to content

Commit 488d28d

Browse files
committedJun 6, 2018
[X86] Emit BZHI when mask is ~(-1 << nbits))
Summary: In D47428, i propose to choose the `~(-(1 << nbits))` as the canonical form of low-bit-mask formation. As it is seen from these tests, there is a reason for that. AArch64 currently better handles `~(-(1 << nbits))`, but not the more traditional `(1 << nbits) - 1` (sic!). The other way around for X86. It would be much better to canonicalize. This patch is completely monkey-typing. I don't really understand how this works :) I have based it on `// x & (-1 >> (32 - y))` pattern. Also, when we only have `BMI`, i wonder if we could use `BEXTR` with `start=0` ? Related links: https://bugs.llvm.org/show_bug.cgi?id=36419 https://bugs.llvm.org/show_bug.cgi?id=37603 https://bugs.llvm.org/show_bug.cgi?id=37610 https://rise4fun.com/Alive/idM Reviewers: craig.topper, spatel, RKSimon, javed.absar Reviewed By: craig.topper Subscribers: kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D47453 llvm-svn: 334125
1 parent cb56f7a commit 488d28d

File tree

2 files changed

+77
-113
lines changed

2 files changed

+77
-113
lines changed
 

‎llvm/lib/Target/X86/X86InstrInfo.td

+41-13
Original file line numberDiff line numberDiff line change
@@ -2448,21 +2448,49 @@ let Predicates = [HasBMI2, NoTBM] in {
24482448
}
24492449

24502450
let Predicates = [HasBMI2] in {
2451-
def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)),
2452-
(BZHI32rr GR32:$src,
2453-
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2454-
2455-
def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)),
2456-
(BZHI32rm addr:$src,
2457-
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2451+
multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
2452+
ValueType VT, Instruction DstInst,
2453+
Instruction DstMemInst> {
2454+
def : Pat<regpattern,
2455+
(DstInst RC:$src,
2456+
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2457+
def : Pat<mempattern,
2458+
(DstMemInst addr:$src,
2459+
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2460+
}
24582461

2459-
def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
2460-
(BZHI64rr GR64:$src,
2461-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2462+
multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
2463+
Instruction DstInst, X86MemOperand x86memop,
2464+
Instruction DstMemInst> {
2465+
// x & ((1 << y) - 1)
2466+
defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
2467+
(and (x86memop addr:$src),
2468+
(add (shl 1, GR8:$lz), -1)),
2469+
RC, VT, DstInst, DstMemInst>;
2470+
2471+
// x & ~(-1 << y)
2472+
defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
2473+
(and (x86memop addr:$src),
2474+
(xor (shl -1, GR8:$lz), -1)),
2475+
RC, VT, DstInst, DstMemInst>;
2476+
2477+
// x & (-1 >> (bitwidth - y))
2478+
defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
2479+
(and (x86memop addr:$src),
2480+
(srl -1, (sub bitwidth, GR8:$lz))),
2481+
RC, VT, DstInst, DstMemInst>;
2482+
2483+
// x << (bitwidth - y) >> (bitwidth - y)
2484+
defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
2485+
(sub bitwidth, GR8:$lz)),
2486+
(srl (shl (x86memop addr:$src),
2487+
(sub bitwidth, GR8:$lz)),
2488+
(sub bitwidth, GR8:$lz)),
2489+
RC, VT, DstInst, DstMemInst>;
2490+
}
24622491

2463-
def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
2464-
(BZHI64rm addr:$src,
2465-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
2492+
defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
2493+
defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
24662494

24672495
// x & (-1 >> (32 - y))
24682496
def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),

‎llvm/test/CodeGen/X86/extract-lowbits.ll

+36-100
Original file line numberDiff line numberDiff line change
@@ -537,9 +537,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
537537
; X86-BMI1BMI2-LABEL: bzhi32_b0:
538538
; X86-BMI1BMI2: # %bb.0:
539539
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
540-
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
541-
; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
542-
; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
540+
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
543541
; X86-BMI1BMI2-NEXT: retl
544542
;
545543
; X64-NOBMI-LABEL: bzhi32_b0:
@@ -553,9 +551,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
553551
;
554552
; X64-BMI1BMI2-LABEL: bzhi32_b0:
555553
; X64-BMI1BMI2: # %bb.0:
556-
; X64-BMI1BMI2-NEXT: movl $-1, %eax
557-
; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
558-
; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
554+
; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
559555
; X64-BMI1BMI2-NEXT: retq
560556
%notmask = shl i32 -1, %numlowbits
561557
%mask = xor i32 %notmask, -1
@@ -576,9 +572,7 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
576572
; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
577573
; X86-BMI1BMI2: # %bb.0:
578574
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
579-
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
580-
; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
581-
; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
575+
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
582576
; X86-BMI1BMI2-NEXT: retl
583577
;
584578
; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
@@ -592,9 +586,7 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
592586
;
593587
; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
594588
; X64-BMI1BMI2: # %bb.0:
595-
; X64-BMI1BMI2-NEXT: movl $-1, %eax
596-
; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
597-
; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
589+
; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
598590
; X64-BMI1BMI2-NEXT: retq
599591
%conv = zext i8 %numlowbits to i32
600592
%notmask = shl i32 -1, %conv
@@ -618,9 +610,7 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
618610
; X86-BMI1BMI2: # %bb.0:
619611
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
620612
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
621-
; X86-BMI1BMI2-NEXT: movl $-1, %edx
622-
; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %ecx
623-
; X86-BMI1BMI2-NEXT: andnl (%eax), %ecx, %eax
613+
; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
624614
; X86-BMI1BMI2-NEXT: retl
625615
;
626616
; X64-NOBMI-LABEL: bzhi32_b2_load:
@@ -634,9 +624,7 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
634624
;
635625
; X64-BMI1BMI2-LABEL: bzhi32_b2_load:
636626
; X64-BMI1BMI2: # %bb.0:
637-
; X64-BMI1BMI2-NEXT: movl $-1, %eax
638-
; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
639-
; X64-BMI1BMI2-NEXT: andnl (%rdi), %eax, %eax
627+
; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
640628
; X64-BMI1BMI2-NEXT: retq
641629
%val = load i32, i32* %w
642630
%notmask = shl i32 -1, %numlowbits
@@ -660,9 +648,7 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
660648
; X86-BMI1BMI2: # %bb.0:
661649
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
662650
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
663-
; X86-BMI1BMI2-NEXT: movl $-1, %edx
664-
; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %ecx
665-
; X86-BMI1BMI2-NEXT: andnl (%eax), %ecx, %eax
651+
; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
666652
; X86-BMI1BMI2-NEXT: retl
667653
;
668654
; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
@@ -676,9 +662,7 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
676662
;
677663
; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
678664
; X64-BMI1BMI2: # %bb.0:
679-
; X64-BMI1BMI2-NEXT: movl $-1, %eax
680-
; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
681-
; X64-BMI1BMI2-NEXT: andnl (%rdi), %eax, %eax
665+
; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
682666
; X64-BMI1BMI2-NEXT: retq
683667
%val = load i32, i32* %w
684668
%conv = zext i8 %numlowbits to i32
@@ -701,9 +685,7 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
701685
; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative:
702686
; X86-BMI1BMI2: # %bb.0:
703687
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
704-
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
705-
; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
706-
; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
688+
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
707689
; X86-BMI1BMI2-NEXT: retl
708690
;
709691
; X64-NOBMI-LABEL: bzhi32_b4_commutative:
@@ -717,9 +699,7 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
717699
;
718700
; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative:
719701
; X64-BMI1BMI2: # %bb.0:
720-
; X64-BMI1BMI2-NEXT: movl $-1, %eax
721-
; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
722-
; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
702+
; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
723703
; X64-BMI1BMI2-NEXT: retq
724704
%notmask = shl i32 -1, %numlowbits
725705
%mask = xor i32 %notmask, -1
@@ -778,9 +758,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
778758
;
779759
; X64-BMI1BMI2-LABEL: bzhi64_b0:
780760
; X64-BMI1BMI2: # %bb.0:
781-
; X64-BMI1BMI2-NEXT: movq $-1, %rax
782-
; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
783-
; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
761+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
784762
; X64-BMI1BMI2-NEXT: retq
785763
%notmask = shl i64 -1, %numlowbits
786764
%mask = xor i64 %notmask, -1
@@ -838,9 +816,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
838816
; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
839817
; X64-BMI1BMI2: # %bb.0:
840818
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
841-
; X64-BMI1BMI2-NEXT: movq $-1, %rax
842-
; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
843-
; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
819+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
844820
; X64-BMI1BMI2-NEXT: retq
845821
%conv = zext i8 %numlowbits to i64
846822
%notmask = shl i64 -1, %conv
@@ -902,9 +878,7 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
902878
;
903879
; X64-BMI1BMI2-LABEL: bzhi64_b2_load:
904880
; X64-BMI1BMI2: # %bb.0:
905-
; X64-BMI1BMI2-NEXT: movq $-1, %rax
906-
; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
907-
; X64-BMI1BMI2-NEXT: andnq (%rdi), %rax, %rax
881+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
908882
; X64-BMI1BMI2-NEXT: retq
909883
%val = load i64, i64* %w
910884
%notmask = shl i64 -1, %numlowbits
@@ -967,9 +941,7 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
967941
; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
968942
; X64-BMI1BMI2: # %bb.0:
969943
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
970-
; X64-BMI1BMI2-NEXT: movq $-1, %rax
971-
; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
972-
; X64-BMI1BMI2-NEXT: andnq (%rdi), %rax, %rax
944+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
973945
; X64-BMI1BMI2-NEXT: retq
974946
%val = load i64, i64* %w
975947
%conv = zext i8 %numlowbits to i64
@@ -1028,9 +1000,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
10281000
;
10291001
; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
10301002
; X64-BMI1BMI2: # %bb.0:
1031-
; X64-BMI1BMI2-NEXT: movq $-1, %rax
1032-
; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
1033-
; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
1003+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
10341004
; X64-BMI1BMI2-NEXT: retq
10351005
%notmask = shl i64 -1, %numlowbits
10361006
%mask = xor i64 %notmask, -1
@@ -1091,11 +1061,8 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
10911061
;
10921062
; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
10931063
; X86-BMI1BMI2: # %bb.0:
1094-
; X86-BMI1BMI2-NEXT: movb $32, %al
1095-
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1096-
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
1097-
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1098-
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
1064+
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
1065+
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
10991066
; X86-BMI1BMI2-NEXT: retl
11001067
;
11011068
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
@@ -1109,11 +1076,7 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
11091076
;
11101077
; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
11111078
; X64-BMI1BMI2: # %bb.0:
1112-
; X64-BMI1BMI2-NEXT: movb $32, %al
1113-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1114-
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
1115-
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1116-
; X64-BMI1BMI2-NEXT: andl %edi, %eax
1079+
; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
11171080
; X64-BMI1BMI2-NEXT: retq
11181081
%numhighbits = sub i8 32, %numlowbits
11191082
%sh_prom = zext i8 %numhighbits to i32
@@ -1175,12 +1138,9 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
11751138
;
11761139
; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
11771140
; X86-BMI1BMI2: # %bb.0:
1178-
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1179-
; X86-BMI1BMI2-NEXT: movb $32, %al
1180-
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1181-
; X86-BMI1BMI2-NEXT: movl $-1, %edx
1182-
; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %eax
1183-
; X86-BMI1BMI2-NEXT: andl (%ecx), %eax
1141+
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1142+
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
1143+
; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
11841144
; X86-BMI1BMI2-NEXT: retl
11851145
;
11861146
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
@@ -1194,11 +1154,7 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
11941154
;
11951155
; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
11961156
; X64-BMI1BMI2: # %bb.0:
1197-
; X64-BMI1BMI2-NEXT: movb $32, %al
1198-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1199-
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
1200-
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1201-
; X64-BMI1BMI2-NEXT: andl (%rdi), %eax
1157+
; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
12021158
; X64-BMI1BMI2-NEXT: retq
12031159
%val = load i32, i32* %w
12041160
%numhighbits = sub i8 32, %numlowbits
@@ -1350,11 +1306,8 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
13501306
;
13511307
; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
13521308
; X64-BMI1BMI2: # %bb.0:
1353-
; X64-BMI1BMI2-NEXT: movb $64, %al
1354-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1355-
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
1356-
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
1357-
; X64-BMI1BMI2-NEXT: andq %rdi, %rax
1309+
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
1310+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
13581311
; X64-BMI1BMI2-NEXT: retq
13591312
%numhighbits = sub i8 64, %numlowbits
13601313
%sh_prom = zext i8 %numhighbits to i64
@@ -1479,11 +1432,8 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
14791432
;
14801433
; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
14811434
; X64-BMI1BMI2: # %bb.0:
1482-
; X64-BMI1BMI2-NEXT: movb $64, %al
1483-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1484-
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
1485-
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
1486-
; X64-BMI1BMI2-NEXT: andq (%rdi), %rax
1435+
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
1436+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
14871437
; X64-BMI1BMI2-NEXT: retq
14881438
%val = load i64, i64* %w
14891439
%numhighbits = sub i8 64, %numlowbits
@@ -1602,10 +1552,8 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
16021552
;
16031553
; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
16041554
; X86-BMI1BMI2: # %bb.0:
1605-
; X86-BMI1BMI2-NEXT: movb $32, %al
1606-
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
1607-
; X86-BMI1BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
1608-
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1555+
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
1556+
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
16091557
; X86-BMI1BMI2-NEXT: retl
16101558
;
16111559
; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
@@ -1619,10 +1567,7 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
16191567
;
16201568
; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
16211569
; X64-BMI1BMI2: # %bb.0:
1622-
; X64-BMI1BMI2-NEXT: movb $32, %al
1623-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1624-
; X64-BMI1BMI2-NEXT: shlxl %eax, %edi, %ecx
1625-
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1570+
; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
16261571
; X64-BMI1BMI2-NEXT: retq
16271572
%numhighbits = sub i8 32, %numlowbits
16281573
%sh_prom = zext i8 %numhighbits to i32
@@ -1685,10 +1630,8 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
16851630
; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
16861631
; X86-BMI1BMI2: # %bb.0:
16871632
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
1688-
; X86-BMI1BMI2-NEXT: movb $32, %cl
1689-
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
1690-
; X86-BMI1BMI2-NEXT: shlxl %ecx, (%eax), %eax
1691-
; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
1633+
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
1634+
; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
16921635
; X86-BMI1BMI2-NEXT: retl
16931636
;
16941637
; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
@@ -1702,10 +1645,7 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
17021645
;
17031646
; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
17041647
; X64-BMI1BMI2: # %bb.0:
1705-
; X64-BMI1BMI2-NEXT: movb $32, %al
1706-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1707-
; X64-BMI1BMI2-NEXT: shlxl %eax, (%rdi), %ecx
1708-
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
1648+
; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
17091649
; X64-BMI1BMI2-NEXT: retq
17101650
%val = load i32, i32* %w
17111651
%numhighbits = sub i8 32, %numlowbits
@@ -1892,10 +1832,8 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
18921832
;
18931833
; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
18941834
; X64-BMI1BMI2: # %bb.0:
1895-
; X64-BMI1BMI2-NEXT: movb $64, %al
1896-
; X64-BMI1BMI2-NEXT: subb %sil, %al
1897-
; X64-BMI1BMI2-NEXT: shlxq %rax, %rdi, %rcx
1898-
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
1835+
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
1836+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
18991837
; X64-BMI1BMI2-NEXT: retq
19001838
%numhighbits = sub i8 64, %numlowbits
19011839
%sh_prom = zext i8 %numhighbits to i64
@@ -2084,10 +2022,8 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
20842022
;
20852023
; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
20862024
; X64-BMI1BMI2: # %bb.0:
2087-
; X64-BMI1BMI2-NEXT: movb $64, %al
2088-
; X64-BMI1BMI2-NEXT: subb %sil, %al
2089-
; X64-BMI1BMI2-NEXT: shlxq %rax, (%rdi), %rcx
2090-
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
2025+
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
2026+
; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
20912027
; X64-BMI1BMI2-NEXT: retq
20922028
%val = load i64, i64* %w
20932029
%numhighbits = sub i8 64, %numlowbits

0 commit comments

Comments
 (0)