diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2677,7 +2677,9 @@
     [[fallthrough]];
   }
   default:
-    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+    // We also ask the target about intrinsics (which could be specific to it).
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
                                             Known, TLO, Depth))
         return true;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -92,6 +92,7 @@
 #include <cstdlib>
 #include <iterator>
 #include <limits>
+#include <optional>
 #include <tuple>
 #include <utility>
 #include <vector>
@@ -15006,17 +15007,20 @@
   return CSNeg;
 }
 
-static bool IsSVECntIntrinsic(SDValue S) {
+static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
   switch(getIntrinsicID(S.getNode())) {
   default:
     break;
   case Intrinsic::aarch64_sve_cntb:
+    return 8;
   case Intrinsic::aarch64_sve_cnth:
+    return 16;
   case Intrinsic::aarch64_sve_cntw:
+    return 32;
   case Intrinsic::aarch64_sve_cntd:
-    return true;
+    return 64;
   }
-  return false;
+  return {};
 }
 
 /// Calculates what the pre-extend type is, based on the extension
@@ -23296,6 +23300,24 @@
     // used - simplify to just Val.
     return TLO.CombineTo(Op, ShiftR->getOperand(0));
   }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    if (auto ElementSize = IsSVECntIntrinsic(Op)) {
+      unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
+      if (!MaxSVEVectorSizeInBits)
+        MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;
+      unsigned MaxElements = MaxSVEVectorSizeInBits / *ElementSize;
+      // The SVE count intrinsics don't support the multiplier immediate so we
+      // don't have to account for that here. The value returned may be slightly
+      // over the true required bits, as this is based on the "ALL" pattern. The
+      // other patterns are also exposed by these intrinsics, but they all
+      // return a value that's strictly less than "ALL".
+      unsigned RequiredBits = Log2_32(MaxElements) + 1;
+      unsigned BitWidth = Known.Zero.getBitWidth();
+      if (RequiredBits < BitWidth)
+        Known.Zero.setHighBits(BitWidth - RequiredBits);
+      return false;
+    }
+  }
   }
 
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
diff --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -29,9 +29,8 @@
 ; CHECK-LABEL: cntb_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntb x8
-; CHECK-NEXT:    and x9, x8, #0x1ff
-; CHECK-NEXT:    and x8, x8, #0x3fffffffc
-; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    and x9, x8, #0x1fc
+; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %and_redundant = and i64 %cntb, 511
@@ -44,9 +43,8 @@
 ; CHECK-LABEL: cnth_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    and x9, x8, #0x3ff
-; CHECK-NEXT:    and x8, x8, #0x3fffffffc
-; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    and x9, x8, #0xfc
+; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %and_redundant = and i64 %cnth, 1023
@@ -59,9 +57,8 @@
 ; CHECK-LABEL: cntw_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    and x9, x8, #0x7f
-; CHECK-NEXT:    and x8, x8, #0x3fffffffc
-; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    and x9, x8, #0x7c
+; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %and_redundant = and i64 %cntw, 127
@@ -74,9 +71,8 @@
 ; CHECK-LABEL: cntd_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    and x9, x8, #0x3f
-; CHECK-NEXT:    and x8, x8, #0x3fffffffc
-; CHECK-NEXT:    add x0, x9, x8
+; CHECK-NEXT:    and x9, x8, #0x3c
+; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %and_redundant = and i64 %cntd, 63
@@ -112,8 +108,7 @@
 define i64 @count_bytes_trunc_zext() {
 ; CHECK-LABEL: count_bytes_trunc_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntb x8
-; CHECK-NEXT:    and x0, x8, #0xffffffff
+; CHECK-NEXT:    cntb x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -124,8 +119,7 @@
 define i64 @count_halfs_trunc_zext() {
 ; CHECK-LABEL: count_halfs_trunc_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    and x0, x8, #0xffffffff
+; CHECK-NEXT:    cnth x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -136,8 +130,7 @@
 define i64 @count_words_trunc_zext() {
 ; CHECK-LABEL: count_words_trunc_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    and x0, x8, #0xffffffff
+; CHECK-NEXT:    cntw x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -148,8 +141,7 @@
 define i64 @count_doubles_trunc_zext() {
 ; CHECK-LABEL: count_doubles_trunc_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    and x0, x8, #0xffffffff
+; CHECK-NEXT:    cntd x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -160,8 +152,7 @@
 define i64 @count_bytes_trunc_sext() {
 ; CHECK-LABEL: count_bytes_trunc_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntb x8
-; CHECK-NEXT:    sxtw x0, w8
+; CHECK-NEXT:    cntb x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -172,8 +163,7 @@
 define i64 @count_halfs_trunc_sext() {
 ; CHECK-LABEL: count_halfs_trunc_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cnth x8
-; CHECK-NEXT:    sxtw x0, w8
+; CHECK-NEXT:    cnth x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -184,8 +174,7 @@
 define i64 @count_words_trunc_sext() {
 ; CHECK-LABEL: count_words_trunc_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntw x8
-; CHECK-NEXT:    sxtw x0, w8
+; CHECK-NEXT:    cntw x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %trunc = trunc i64 %cnt to i32
@@ -196,8 +185,7 @@
 define i64 @count_doubles_trunc_sext() {
 ; CHECK-LABEL: count_doubles_trunc_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    sxtw x0, w8
+; CHECK-NEXT:    cntd x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %trunc = trunc i64 %cnt to i32