Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp
+++ lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2582,7 +2582,7 @@
   return true;
 }
 
-// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
+// See if this is an (X >> C1) & C2 that we can match to BEXTRI.
 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   SDLoc dl(Node);
@@ -2590,7 +2590,12 @@
   SDValue N0 = Node->getOperand(0);
   SDValue N1 = Node->getOperand(1);
 
-  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
+  // Only do this for BEXTRI since it takes an immediate. We could do put an
+  // immediate in a register for BMI, but that's the same number of instructions
+  // and on Intel CPUs BEXTR is 2 uops. So it would be a net increase in uops.
+  // Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+  // hoisting the move immediate would make it worthwhile?
+  if (!Subtarget->hasTBM())
     return false;
 
   // Must have a shift right.
Index: test/CodeGen/X86/bmi-x86_64.ll
===================================================================
--- test/CodeGen/X86/bmi-x86_64.ll
+++ test/CodeGen/X86/bmi-x86_64.ll
@@ -16,8 +16,9 @@
 define i64 @bextr64b(i64 %x)  uwtable  ssp {
 ; CHECK-LABEL: bextr64b:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $3076, %eax # imm = 0xC04
-; CHECK-NEXT:    bextrl %eax, %edi, %eax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shrl $4, %eax
+; CHECK-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; CHECK-NEXT:    retq
   %1 = lshr i64 %x, 4
   %2 = and i64 %1, 4095
@@ -39,8 +40,9 @@
 define i64 @bextr64b_load(i64* %x) {
 ; CHECK-LABEL: bextr64b_load:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $3076, %eax # imm = 0xC04
-; CHECK-NEXT:    bextrl %eax, (%rdi), %eax
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    shrl $4, %eax
+; CHECK-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; CHECK-NEXT:    retq
   %1 = load i64, i64* %x, align 8
   %2 = lshr i64 %1, 4
@@ -61,11 +63,19 @@
 }
 
 define i64 @bextr64d(i64 %a) {
-; CHECK-LABEL: bextr64d:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $8450, %eax # imm = 0x2102
-; CHECK-NEXT:    bextrq %rax, %rdi, %rax
-; CHECK-NEXT:    retq
+; BMI1-LABEL: bextr64d:
+; BMI1:       # %bb.0: # %entry
+; BMI1-NEXT:    shrq $2, %rdi
+; BMI1-NEXT:    movl $8448, %eax # imm = 0x2100
+; BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; BMI1-NEXT:    retq
+;
+; BMI2-LABEL: bextr64d:
+; BMI2:       # %bb.0: # %entry
+; BMI2-NEXT:    shrq $2, %rdi
+; BMI2-NEXT:    movb $33, %al
+; BMI2-NEXT:    bzhiq %rax, %rdi, %rax
+; BMI2-NEXT:    retq
 entry:
   %shr = lshr i64 %a, 2
   %and = and i64 %shr, 8589934591
Index: test/CodeGen/X86/bmi.ll
===================================================================
--- test/CodeGen/X86/bmi.ll
+++ test/CodeGen/X86/bmi.ll
@@ -344,14 +344,16 @@
 define i32 @bextr32b(i32 %x)  uwtable  ssp {
 ; X86-LABEL: bextr32b:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $3076, %eax # imm = 0xC04
-; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bextr32b:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $3076, %eax # imm = 0xC04
-; X64-NEXT:    bextrl %eax, %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; X64-NEXT:    retq
   %1 = lshr i32 %x, 4
   %2 = and i32 %1, 4095
@@ -379,14 +381,16 @@
 ; X86-LABEL: bextr32b_load:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $3076, %ecx # imm = 0xC04
-; X86-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bextr32b_load:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $3076, %eax # imm = 0xC04
-; X64-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    andl $4095, %eax # imm = 0xFFF
 ; X64-NEXT:    retq
   %1 = load i32, i32* %x
   %2 = lshr i32 %1, 4
Index: test/CodeGen/X86/extract-bits.ll
===================================================================
--- test/CodeGen/X86/extract-bits.ll
+++ test/CodeGen/X86/extract-bits.ll
@@ -5653,8 +5653,9 @@
 ;
 ; X86-BMI1NOTBM-LABEL: c0_i32:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    shrl $19, %eax
+; X86-BMI1NOTBM-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1TBM-LABEL: c0_i32:
@@ -5664,8 +5665,9 @@
 ;
 ; X86-BMI1NOTBMBMI2-LABEL: c0_i32:
 ; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBMBMI2-NEXT:    shrl $19, %eax
+; X86-BMI1NOTBMBMI2-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X86-BMI1NOTBMBMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c0_i32:
@@ -5677,8 +5679,9 @@
 ;
 ; X64-BMI1NOTBM-LABEL: c0_i32:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    movl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    shrl $19, %eax
+; X64-BMI1NOTBM-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1TBM-LABEL: c0_i32:
@@ -5688,8 +5691,9 @@
 ;
 ; X64-BMI1NOTBMBMI2-LABEL: c0_i32:
 ; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBMBMI2-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1NOTBMBMI2-NEXT:    movl %edi, %eax
+; X64-BMI1NOTBMBMI2-NEXT:    shrl $19, %eax
+; X64-BMI1NOTBMBMI2-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X64-BMI1NOTBMBMI2-NEXT:    retq
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 1023