Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2582,7 +2582,7 @@ return true; } -// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. +// See if this is an (X >> C1) & C2 that we can match to BEXTRI. bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); SDLoc dl(Node); @@ -2590,7 +2590,12 @@ SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - if (!Subtarget->hasBMI() && !Subtarget->hasTBM()) + // Only do this for BEXTRI since it takes an immediate. We could do put an + // immediate in a register for BMI, but that's the same number of instructions + // and on Intel CPUs BEXTR is 2 uops. So it would be a net increase in uops. + // Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile? + if (!Subtarget->hasTBM()) return false; // Must have a shift right. Index: test/CodeGen/X86/bmi-x86_64.ll =================================================================== --- test/CodeGen/X86/bmi-x86_64.ll +++ test/CodeGen/X86/bmi-x86_64.ll @@ -16,8 +16,9 @@ define i64 @bextr64b(i64 %x) uwtable ssp { ; CHECK-LABEL: bextr64b: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, %edi, %eax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: andl $4095, %eax # imm = 0xFFF ; CHECK-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 @@ -39,8 +40,9 @@ define i64 @bextr64b_load(i64* %x) { ; CHECK-LABEL: bextr64b_load: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $3076, %eax # imm = 0xC04 -; CHECK-NEXT: bextrl %eax, (%rdi), %eax +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: andl $4095, %eax # imm = 0xFFF ; CHECK-NEXT: retq %1 = load i64, i64* %x, align 8 %2 = lshr i64 %1, 4 @@ -61,11 +63,19 @@ } define i64 @bextr64d(i64 %a) { -; CHECK-LABEL: bextr64d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $8450, %eax # imm = 0x2102 -; CHECK-NEXT: bextrq %rax, %rdi, %rax -; CHECK-NEXT: retq +; BMI1-LABEL: bextr64d: +; BMI1: # %bb.0: # %entry +; BMI1-NEXT: shrq $2, %rdi +; BMI1-NEXT: movl $8448, %eax # imm = 0x2100 +; BMI1-NEXT: bextrq %rax, %rdi, %rax +; BMI1-NEXT: retq +; +; BMI2-LABEL: bextr64d: +; BMI2: # %bb.0: # %entry +; BMI2-NEXT: shrq $2, %rdi +; BMI2-NEXT: movb $33, %al +; BMI2-NEXT: bzhiq %rax, %rdi, %rax +; BMI2-NEXT: retq entry: %shr = lshr i64 %a, 2 %and = and i64 %shr, 8589934591 Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -344,14 +344,16 @@ define i32 @bextr32b(i32 %x) uwtable ssp { ; X86-LABEL: bextr32b: ; X86: # %bb.0: -; X86-NEXT: movl $3076, %eax # imm = 0xC04 -; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-NEXT: retl ; ; X64-LABEL: bextr32b: ; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-NEXT: retq %1 = lshr i32 %x, 4 %2 = and i32 %1, 4095 @@ -379,14 +381,16 @@ ; X86-LABEL: bextr32b_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $3076, %ecx # imm = 0xC04 -; X86-NEXT: bextrl %ecx, (%eax), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-NEXT: retl ; ; X64-LABEL: bextr32b_load: ; X64: # %bb.0: -; X64-NEXT: movl $3076, %eax # imm = 0xC04 -; X64-NEXT: bextrl %eax, (%rdi), %eax +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-NEXT: retq %1 = load i32, i32* %x %2 = lshr i32 %1, 4 Index: test/CodeGen/X86/extract-bits.ll =================================================================== --- test/CodeGen/X86/extract-bits.ll +++ test/CodeGen/X86/extract-bits.ll @@ -5653,8 +5653,9 @@ ; ; X86-BMI1NOTBM-LABEL: c0_i32: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: shrl $19, %eax +; X86-BMI1NOTBM-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1TBM-LABEL: c0_i32: @@ -5664,8 +5665,9 @@ ; ; X86-BMI1NOTBMBMI2-LABEL: c0_i32: ; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBMBMI2-NEXT: shrl $19, %eax +; X86-BMI1NOTBMBMI2-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-BMI1NOTBMBMI2-NEXT: retl ; ; X64-NOBMI-LABEL: c0_i32: @@ -5677,8 +5679,9 @@ ; ; X64-BMI1NOTBM-LABEL: c0_i32: ; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBM-NEXT: movl %edi, %eax +; X64-BMI1NOTBM-NEXT: shrl $19, %eax +; X64-BMI1NOTBM-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-BMI1NOTBM-NEXT: retq ; ; X64-BMI1TBM-LABEL: c0_i32: @@ -5688,8 +5691,9 @@ ; ; X64-BMI1NOTBMBMI2-LABEL: c0_i32: ; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: movl %edi, %eax +; X64-BMI1NOTBMBMI2-NEXT: shrl $19, %eax +; X64-BMI1NOTBMBMI2-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-BMI1NOTBMBMI2-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023