diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3504,9 +3504,11 @@ // b) x & ~(-1 << nbits) // c) x & (-1 >> (32 - y)) // d) x << (32 - y) >> (32 - y) +// e) (1 << nbits) - 1 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { assert( - (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && + (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND || + Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."); // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. @@ -3692,6 +3694,8 @@ if (!matchLowBitMask(Mask)) return false; } + } else if (matchLowBitMask(SDValue(Node, 0))) { + X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT); } else if (!matchPatternD(Node)) return false; @@ -5067,6 +5071,9 @@ [[fallthrough]]; case ISD::ADD: + if (Opcode == ISD::ADD && matchBitExtract(Node)) + return; + [[fallthrough]]; case ISD::SUB: { // Try to avoid folding immediates with multiple uses for optsize. // This code tries to select to register form directly to avoid going diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -4068,10 +4068,8 @@ ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax @@ -4131,10 +4129,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax ; X64-BMI2-NEXT: addq $8, %rsp @@ -4202,10 +4198,8 @@ ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax @@ -4265,10 +4259,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax ; X64-BMI2-NEXT: addq $8, %rsp @@ -4341,10 +4333,8 @@ ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax @@ -4404,10 +4394,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax ; X64-BMI2-NEXT: addq $8, %rsp @@ -4479,10 +4467,8 @@ ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax @@ -4542,10 +4528,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax ; X64-BMI2-NEXT: addq $8, %rsp @@ -4616,10 +4600,8 @@ ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax @@ -4679,10 +4661,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax ; X64-BMI2-NEXT: addq $8, %rsp @@ -4761,10 +4741,8 @@ ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl %ebx, %eax -; X86-BMI2-NEXT: negb %al -; X86-BMI2-NEXT: movl $-1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: bzhil %ebx, %eax, %eax ; X86-BMI2-NEXT: movl %eax, (%esp) ; X86-BMI2-NEXT: calll use32@PLT ; X86-BMI2-NEXT: bzhil %ebx, %esi, %esi @@ -4833,10 +4811,8 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: movl %esi, %ebp ; X64-BMI2-NEXT: shrxl %esi, %edi, %r14d -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %edi ; X64-BMI2-NEXT: callq use32@PLT ; X64-BMI2-NEXT: bzhil %ebx, %r14d, %ebx ; X64-BMI2-NEXT: movl %ebp, %edi @@ -5045,10 +5021,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax ; X64-BMI2-NEXT: addq $8, %rsp @@ -5252,10 +5226,8 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rbx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax ; X64-BMI2-NEXT: addq $8, %rsp @@ -5463,10 +5435,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax ; X64-BMI2-NEXT: addq $8, %rsp @@ -5674,10 +5644,8 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rbx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax ; X64-BMI2-NEXT: addq $8, %rsp @@ -5883,10 +5851,8 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax ; X64-BMI2-NEXT: addq $8, %rsp @@ -6109,10 +6075,8 @@ ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: movq %rsi, %r14 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r15 -; X64-BMI2-NEXT: movl %ebx, %eax -; X64-BMI2-NEXT: negb %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rdi ; X64-BMI2-NEXT: callq use64@PLT ; X64-BMI2-NEXT: bzhiq %rbx, %r15, %rbx ; X64-BMI2-NEXT: movq %r14, %rdi diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll --- a/llvm/test/CodeGen/X86/extract-lowbits.ll +++ b/llvm/test/CodeGen/X86/extract-lowbits.ll @@ -2117,16 +2117,12 @@ ; ; X86-BMI2-LABEL: bzhi32_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X86-BMI2-NEXT: negb %dl -; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %edx, %esi, %edx -; X86-BMI2-NEXT: movl %edx, (%ecx) -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx +; X86-BMI2-NEXT: movl %edx, (%eax) +; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c0: @@ -2153,12 +2149,10 @@ ; ; X64-BMI2-LABEL: bzhi32_c0: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %esi, %eax, %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) ; X64-BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx -; X64-BMI2-NEXT: movl %ecx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -2194,16 +2188,12 @@ ; ; X86-BMI2-LABEL: bzhi32_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X86-BMI2-NEXT: negb %dl -; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %edx, %esi, %edx -; X86-BMI2-NEXT: movl %edx, (%ecx) -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx +; X86-BMI2-NEXT: movl %edx, (%eax) +; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: @@ -2230,12 +2220,10 @@ ; ; X64-BMI2-LABEL: bzhi32_c1_indexzext: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %esi, %eax, %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) ; X64-BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx -; X64-BMI2-NEXT: movl %ecx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -2284,12 +2272,10 @@ ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax -; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X86-BMI2-NEXT: negb %dl ; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %edx, %esi, %edx -; X86-BMI2-NEXT: movl %edx, (%ecx) +; X86-BMI2-NEXT: bzhil %edx, %esi, %esi +; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax +; X86-BMI2-NEXT: movl %esi, (%ecx) ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; @@ -2319,11 +2305,9 @@ ; ; X64-BMI2-LABEL: bzhi32_c2_load: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %esi, %eax, %ecx ; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx ; X64-BMI2-NEXT: movl %ecx, (%rdx) ; X64-BMI2-NEXT: retq %val = load i32, ptr %w @@ -2373,12 +2357,10 @@ ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax -; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X86-BMI2-NEXT: negb %dl ; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %edx, %esi, %edx -; X86-BMI2-NEXT: movl %edx, (%ecx) +; X86-BMI2-NEXT: bzhil %edx, %esi, %esi +; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax +; X86-BMI2-NEXT: movl %esi, (%ecx) ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; @@ -2408,11 +2390,9 @@ ; ; X64-BMI2-LABEL: bzhi32_c3_load_indexzext: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %esi, %eax, %ecx ; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx ; X64-BMI2-NEXT: movl %ecx, (%rdx) ; X64-BMI2-NEXT: retq %val = load i32, ptr %w @@ -2451,16 +2431,12 @@ ; ; X86-BMI2-LABEL: bzhi32_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X86-BMI2-NEXT: negb %dl -; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %edx, %esi, %edx -; X86-BMI2-NEXT: movl %edx, (%ecx) -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx +; X86-BMI2-NEXT: movl %edx, (%eax) +; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: @@ -2487,12 +2463,10 @@ ; ; X64-BMI2-LABEL: bzhi32_c4_commutative: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: bzhil %esi, %eax, %eax +; X64-BMI2-NEXT: movl %eax, (%rdx) ; X64-BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx -; X64-BMI2-NEXT: movl %ecx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -2593,12 +2567,10 @@ ; ; X64-BMI2-LABEL: bzhi64_c0: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) ; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx -; X64-BMI2-NEXT: movq %rcx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -2698,12 +2670,10 @@ ; X64-BMI2-LABEL: bzhi64_c1_indexzext: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) ; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx -; X64-BMI2-NEXT: movq %rcx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -2824,11 +2794,9 @@ ; ; X64-BMI2-LABEL: bzhi64_c2_load: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rcx ; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx ; X64-BMI2-NEXT: movq %rcx, (%rdx) ; X64-BMI2-NEXT: retq %val = load i64, ptr %w @@ -2951,11 +2919,9 @@ ; X64-BMI2-LABEL: bzhi64_c3_load_indexzext: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rcx ; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx ; X64-BMI2-NEXT: movq %rcx, (%rdx) ; X64-BMI2-NEXT: retq %val = load i64, ptr %w @@ -3057,12 +3023,10 @@ ; ; X64-BMI2-LABEL: bzhi64_c4_commutative: ; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax +; X64-BMI2-NEXT: movq %rax, (%rdx) ; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI2-NEXT: negb %sil -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx -; X64-BMI2-NEXT: movq %rcx, (%rdx) ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits