diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -836,7 +836,7 @@
     }
   }
 
-  Value *A;
+  Value *A, *B;
   Constant *C;
   if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
     unsigned AWidth = A->getType()->getScalarSizeInBits();
@@ -950,6 +950,18 @@
     }
   }
 
+  // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
+  if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
+                                                       m_Value(B))))) {
+    Value *WidthDiff =
+        ConstantInt::get(A->getType(), Src->getType()->getScalarSizeInBits() -
+                                           A->getType()->getScalarSizeInBits());
+    return replaceInstUsesWith(
+        Trunc,
+        Builder.CreateAdd(
+            Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B}),
+            WidthDiff));
+  }
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If there is a sequence of zext -> ctlz -> trunc, then the ctlz can be
+; performed in lower datatype and the remaining bits can be added to the
+; ctlz result, thus removing the need of zext and trunc.
+
+declare i8 @llvm.ctlz.i8 (i8, i1);
+declare i64 @llvm.ctlz.i64 (i64, i1);
+declare i32 @llvm.ctlz.i32 (i32, i1);
+declare i16 @llvm.ctlz.i16 (i16, i1);
+
+define i16 @src0(i16 %x) {
+; CHECK-LABEL: @src0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i16 [[TMP1]], 16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %z = zext i16 %x to i32
+  %p = call i32 @llvm.ctlz.i32(i32 %z, i1 false)
+  %zz = trunc i32 %p to i16
+  ret i16 %zz
+}
+
+define i8 @src1(i8 %x) {
+; CHECK-LABEL: @src1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[X:%.*]], i1 false), !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP1]], 24
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %z = zext i8 %x to i32
+  %p = call i32 @llvm.ctlz.i32(i32 %z, i1 false)
+  %zz = trunc i32 %p to i8
+  ret i8 %zz
+}
+
+define i8 @src2(i8 %x) {
+; CHECK-LABEL: @src2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[X:%.*]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP1]], 56
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %z = zext i8 %x to i64
+  %p = call i64 @llvm.ctlz.i64(i64 %z, i1 false)
+  %zz = trunc i64 %p to i8
+  ret i8 %zz
+}
+
+define i32 @src3(i32 %x) {
+; CHECK-LABEL: @src3(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG2:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %z = zext i32 %x to i64
+  %p = call i64 @llvm.ctlz.i64(i64 %z, i1 false)
+  %zz = trunc i64 %p to i32
+  ret i32 %zz
+}
+
+define i16 @src4(i16 %x) {
+; CHECK-LABEL: @src4(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i16 [[TMP1]], 48
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %z = zext i16 %x to i64
+  %p = call i64 @llvm.ctlz.i64(i64 %z, i1 false)
+  %zz = trunc i64 %p to i16
+  ret i16 %zz
+}