diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1556,25 +1556,22 @@
   // For some CPUs result of CTLZ(X) intrinsic is undefined
   // when X is 0. If we can not guarantee X != 0, we need to check this
   // when expand.
-  bool ZeroCheck = false;
+  bool FoundZeroCheck = false;
   // It is safe to assume Preheader exist as it was checked in
   // parent function RunOnLoop.
   BasicBlock *PH = CurLoop->getLoopPreheader();
 
-  // If we are using the count instruction outside the loop, make sure we
-  // have a zero check as a precondition. Without the check the loop would run
-  // one iteration for before any check of the input value. This means 0 and 1
-  // would have identical behavior in the original loop and thus
+  // If we are using the count instruction outside the loop, see if we have a
+  // zero check as a precondition. Without the check the loop would run one
+  // iteration before any check of the input value. This means an input of 0
+  // or 1 would produce the same count result. If were aren't able to find
+  // the zero check, we'll need to insert our own to select a different result
+  // for an input of 0.
   if (!IsCntPhiUsedOutsideLoop) {
-    auto *PreCondBB = PH->getSinglePredecessor();
-    if (!PreCondBB)
-      return false;
-    auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
-    if (!PreCondBI)
-      return false;
-    if (matchCondition(PreCondBI, PH) != InitX)
-      return false;
-    ZeroCheck = true;
+    if (auto *PreCondBB = PH->getSinglePredecessor())
+      if (auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator()))
+        if (matchCondition(PreCondBI, PH) == InitX)
+          FoundZeroCheck = true;
   }
 
   // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
@@ -1588,9 +1585,8 @@
   //  %inc = add nsw %i.0, 1
   //  br i1 %tobool
 
-  const Value *Args[] = {
-      InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
-                       : ConstantInt::getFalse(InitX->getContext())};
+  const Value *Args[] = {InitX, ConstantInt::getBool(InitX->getContext(),
+                                                     !IsCntPhiUsedOutsideLoop)};
 
   // @llvm.dbg doesn't count as they have no semantic effect.
   auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
@@ -1605,7 +1601,7 @@
     return false;
 
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
-                           DefX->getDebugLoc(), ZeroCheck,
+                           DefX->getDebugLoc(), FoundZeroCheck,
                            IsCntPhiUsedOutsideLoop);
   return true;
 }
@@ -1674,9 +1670,9 @@
 }
 
 static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
-                                    const DebugLoc &DL, bool ZeroCheck,
+                                    const DebugLoc &DL, bool ZeroUndef,
                                     Intrinsic::ID IID) {
-  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
+  Value *Ops[] = {Val, ZeroUndef ? IRBuilder.getTrue() : IRBuilder.getFalse()};
   Type *Tys[] = {Val->getType()};
 
   Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
@@ -1721,7 +1717,7 @@
 void LoopIdiomRecognize::transformLoopToCountable(
     Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
     PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
-    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+    bool FoundZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
   // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1748,7 +1744,10 @@
       llvm_unreachable("Unexpected opcode!");
   } else
     InitXNext = InitX;
-  Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
+  // If the count phi is not used outside the loop, we should use the zero
+  // undef form. If we didn't find a zero check we'll insert our own later.
+  Value *FFS = createFFSIntrinsic(Builder, InitXNext, DL,
+                                  !IsCntPhiUsedOutsideLoop, IntrinID);
   Value *Count = Builder.CreateSub(
       ConstantInt::get(FFS->getType(), FFS->getType()->getIntegerBitWidth()),
       FFS);
@@ -1774,6 +1773,21 @@
     NewCount = Builder.CreateSub(CntInitVal, NewCount);
   }
 
+  // If the count instruction is used outside the loop and we didn't find a zero
+  // check, we need to insert one. It's possible the zero check exists, but was
+  // missed by our simple check. Later passes may be able to optimize out this
+  // new check.
+  if (!IsCntPhiUsedOutsideLoop && !FoundZeroCheck) {
+    // If we didn't find the zero check, we need to assume we'll execute one
+    // iteration of the loop when the input is zero. If the input is zero, we
+    // need to select the initial value incremented/decremented by the count
+    // instruction's immediate operand.
+    Value *Cmp = Builder.CreateICmpEQ(
+        InitXNext, ConstantInt::get(InitXNext->getType(), 0));
+    Value *AltCount = Builder.CreateAdd(CntInitVal, CntInst->getOperand(1));
+    NewCount = Builder.CreateSelect(Cmp, AltCount, NewCount);
+  }
+
   // Step 2: Insert new IV and loop condition:
   // loop:
   //   ...
diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
--- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
@@ -256,6 +256,86 @@
 ; Recognize CTLZ builtin pattern.
 ; Here it will replace the loop -
 ; assume builtin is always profitable.
+; The C code here represents how other loop transformations and instcombine
+; may optimize the earlier ctlz_zero_check test case. The precondition check
+; for 0 is using the input to the absolute value pattern instead of the output.
+; This makes it different than the Value* passed to the loop phi. So we won't
+; be able to tell the input was checked for 0 so our replacement will need to
+; account for this.
+;
+; int ctlz_zero_check_elided_abs(int n)
+; {
+;   int abs_n = n >= 0 ? n : -n;
+;   if (!n)
+;     return 0;
+;   int i = 0;
+;   do {
+;     abs_n >>= 1;
+;     i++;
+;   } while (abs_n);
+;   return i;
+; }
+;
+define i32 @ctlz_zero_check_elided_abs(i32 %n) {
+; ALL-LABEL: @ctlz_zero_check_elided_abs(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[C:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; ALL-NEXT:    [[NEGN:%.*]] = sub nsw i32 0, [[N]]
+; ALL-NEXT:    [[ABS_N:%.*]] = select i1 [[C]], i32 [[N]], i32 [[NEGN]]
+; ALL-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[N]], 0
+; ALL-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; ALL:       while.body.preheader:
+; ALL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
+; ALL-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[ABS_N]], 0
+; ALL-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]]
+; ALL-NEXT:    br label [[WHILE_BODY:%.*]]
+; ALL:       while.body:
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
+; ALL-NEXT:    [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; ALL-NEXT:    [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[ABS_N]], [[WHILE_BODY_PREHEADER]] ]
+; ALL-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_05]], 1
+; ALL-NEXT:    [[INC]] = add nsw i32 [[I_06]], 1
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; ALL:       while.end.loopexit:
+; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_BODY]] ]
+; ALL-NEXT:    br label [[WHILE_END]]
+; ALL:       while.end:
+; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; ALL-NEXT:    ret i32 [[I_0_LCSSA]]
+;
+entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %tobool4 = icmp eq i32 %n, 0
+  br i1 %tobool4, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
+  %shr = lshr i32 %n.addr.05, 1
+  %inc = add nsw i32 %i.06, 1
+  %tobool = icmp eq i32 %shr, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
 ;
 ; int ctlz(int n)
 ; {
@@ -630,10 +710,11 @@
   ret i32 %cnt.0.lcssa
 }
 
-; We can't easily transform this loop. It returns 1 for an input of both
-; 0 and 1.
+; This loop has no check for n being 0 before entering the loop so we need to
+; make one in order to use ctlz since an input of 0 or 1 will produce the same
+; count.
 ;
-; int ctlz_bad(unsigned n)
+; int ctlz_no_zero_check(unsigned n)
 ; {
 ;   int i = 0;
 ;   do {
@@ -643,19 +724,25 @@
 ;   return i;
 ; }
 ;
-define i32 @ctlz_bad(i32 %n) {
-; ALL-LABEL: @ctlz_bad(
+define i32 @ctlz_no_zero_check(i32 %n) {
+; ALL-LABEL: @ctlz_no_zero_check(
 ; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N:%.*]], i1 true)
+; ALL-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[N]], 0
+; ALL-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]]
 ; ALL-NEXT:    br label [[WHILE_COND:%.*]]
 ; ALL:       while.cond:
-; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
 ; ALL-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
 ; ALL:       while.end:
-; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND]] ]
 ; ALL-NEXT:    ret i32 [[INC_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
--- a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll
@@ -6,7 +6,6 @@
 
 ; This test ensures we are able to optimize the following loop to an llvm.abs
 ; followed by an llvm.ctlz.
-; FIXME: LoopIdiom recongize is not forming llvm.ctlz.
 
 ; int ctlz_zero_check(int n)
 ; {
@@ -19,23 +18,15 @@
 ;   return i;
 ; }
 
+; FIXME: The icmp and select in the output are unnecessary.
 define i32 @ctlz_loop_with_abs(i32 %n) {
 ; CHECK-LABEL: @ctlz_loop_with_abs(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL_NOT1:%.*]] = icmp eq i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[N_ADDR_03:%.*]] = phi i32 [ [[TMP1:%.*]], [[WHILE_BODY]] ], [ [[TMP0]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1]] = lshr i32 [[N_ADDR_03]], 1
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_02]], 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nuw nsw i32 32, [[TMP1]]
+; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = select i1 [[TOBOOL_NOT1]], i32 0, i32 [[TMP2]]
 ; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
 ;
 entry: