diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47278,6 +47278,80 @@
   return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
 }
 
+// Recognize the std::bit_ceil pattern and drop the cmov.
+// (cmov 1 (shl 1 (neg (trunc (ctlz X-1)))) (uge X 2))
+// =>
+// (shl 1 (and (neg (trunc (ctlz (X - 1)))) 31))
+static SDValue combineCMovBitCeil(SDValue FalseOp, SDValue TrueOp,
+                                  X86::CondCode CC, SDValue Cond,
+                                  SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const X86Subtarget &Subtarget) {
+  auto IsConstant = [](SDValue V, uint64_t C) {
+    auto *VC = dyn_cast<ConstantSDNode>(V);
+    return VC && VC->getZExtValue() == C;
+  };
+
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  // Limit to i32 and i64.
+  EVT VT = FalseOp.getValueType();
+  if (VT == MVT::i32)
+    ; // OK
+  else if (VT == MVT::i64 && Subtarget.is64Bit())
+    ; // OK
+  else
+    return SDValue();
+
+  // Check the condition.
+  if (CC != X86::COND_AE || Cond.getOpcode() != X86ISD::SUB ||
+      !IsConstant(Cond.getOperand(1), 2) || !Cond.hasOneUse())
+    return SDValue();
+
+  // Check FalseOp.
+  if (!isOneConstant(FalseOp))
+    return SDValue();
+
+  // Check TrueOp.
+  if (TrueOp.getOpcode() != ISD::SHL || !isOneConstant(TrueOp.getOperand(0)) ||
+      !TrueOp.hasOneUse())
+    return SDValue();
+
+  SDValue ShiftCount = TrueOp.getOperand(1);
+  if (ShiftCount.getOpcode() != ISD::SUB || !ShiftCount.hasOneUse())
+    return SDValue();
+
+  unsigned Size = VT.getSizeInBits();
+  if (!IsConstant(ShiftCount.getOperand(0), Size) ||
+      ShiftCount.getOperand(1).getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue Trunc = ShiftCount.getOperand(1);
+  if (Trunc.getOperand(0).getOpcode() != ISD::CTLZ)
+    return SDValue();
+
+  SDValue CTLZ = Trunc.getOperand(0);
+  if (CTLZ.getOperand(0).getOpcode() != ISD::ADD)
+    return SDValue();
+
+  SDValue Add = CTLZ.getOperand(0);
+  if (Add.getOperand(0) != Cond.getOperand(0) ||
+      !isAllOnesConstant(Add.getOperand(1)))
+    return SDValue();
+
+  // Construct (shl 1 (and (neg (trunc ...)) 31)).
+  SDLoc DL(TrueOp);
+  EVT ShiftCountVT = ShiftCount.getValueType();
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, ShiftCountVT,
+                            DAG.getConstant(0, DL, ShiftCountVT), Trunc);
+  SDValue MaskedShiftCount =
+      DAG.getNode(ISD::AND, DL, ShiftCountVT, Neg,
+                  DAG.getConstant(Size - 1, DL, ShiftCountVT));
+  return DAG.getNode(ISD::SHL, DL, VT, DAG.getConstant(1, DL, VT),
+                     MaskedShiftCount);
+}
+
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
@@ -47499,6 +47573,10 @@
     }
   }
 
+  if (SDValue R =
+          combineCMovBitCeil(FalseOp, TrueOp, CC, Cond, DAG, DCI, Subtarget))
+    return R;
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/bit_ceil.ll b/llvm/test/CodeGen/X86/bit_ceil.ll
--- a/llvm/test/CodeGen/X86/bit_ceil.ll
+++ b/llvm/test/CodeGen/X86/bit_ceil.ll
@@ -6,14 +6,11 @@
 define i32 @bit_ceil_i32(i32 %x) {
 ; CHECK-LABEL: bit_ceil_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    lzcntl %eax, %eax
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    lzcntl %edi, %eax
 ; CHECK-NEXT:    negb %al
 ; CHECK-NEXT:    movl $1, %ecx
 ; CHECK-NEXT:    shlxl %eax, %ecx, %eax
-; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    cmovbl %ecx, %eax
 ; CHECK-NEXT:    retq
   %dec = add i32 %x, -1
   %lz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
@@ -27,13 +24,11 @@
 define i64 @bit_ceil_i64(i64 %x) {
 ; CHECK-LABEL: bit_ceil_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq -1(%rdi), %rax
-; CHECK-NEXT:    lzcntq %rax, %rax
+; CHECK-NEXT:    decq %rdi
+; CHECK-NEXT:    lzcntq %rdi, %rax
 ; CHECK-NEXT:    negb %al
 ; CHECK-NEXT:    movl $1, %ecx
 ; CHECK-NEXT:    shlxq %rax, %rcx, %rax
-; CHECK-NEXT:    cmpq $2, %rdi
-; CHECK-NEXT:    cmovbq %rcx, %rax
 ; CHECK-NEXT:    retq
   %dec = add i64 %x, -1
   %lz = tail call i64 @llvm.ctlz.i64(i64 %dec, i1 false)