diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10863,6 +10863,39 @@
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
   }
 
+  // Handle NEON vector reduction instructions that implicitly zero the high
+  // lanes. If we are using a BUILD VECTOR to explicitly zero the high lanes,
+  // then we can propagate the BUILD_VECTOR away entirely.
+  // (build_vector (extract_vector_elt (umax ...), 0), 0...) -> (umax ...)
+  if (usesOnlyOneConstantValue == true && NumConstantLanes == NumElts - 1) {
+    SDValue Op0 = Op.getOperand(0);
+    bool constantLanesAreZero = false;
+    if (ConstantSDNode *cst =
+            dyn_cast<ConstantSDNode>(ConstantValue.getNode())) {
+      constantLanesAreZero = cst->isZero();
+    } else if (ConstantFPSDNode *cst =
+                   dyn_cast<ConstantFPSDNode>(ConstantValue.getNode())) {
+      constantLanesAreZero = cst->isExactlyValue(0.0);
+    }
+    if (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && constantLanesAreZero) {
+      SDValue Op0_0 = Op0.getOperand(0);
+      SDValue Op0_1 = Op0.getOperand(1);
+      ConstantSDNode *ConstantOp0_1 = dyn_cast<ConstantSDNode>(Op0_1.getNode());
+      if (ConstantOp0_1 && ConstantOp0_1->isZero() &&
+          (Op0_0.getOpcode() == AArch64ISD::UMAXV ||
+           Op0_0.getOpcode() == AArch64ISD::UMINV ||
+           Op0_0.getOpcode() == AArch64ISD::SMAXV ||
+           Op0_0.getOpcode() == AArch64ISD::SMINV ||
+           Op0_0.getOpcode() == AArch64ISD::UADDV ||
+           Op0_0.getOpcode() == AArch64ISD::SADDV)) {
+        // NOTE: It would be nice to handle FMAXNM/FMINNM here as well, but
+        // they are currently modeled as intrinsics that return scalars,
+        // which prevents this pattern from being matchable.
+        return Op0_0;
+      }
+    }
+  }
+
   if (AllLanesExtractElt) {
     SDNode *Vector = nullptr;
     bool Even = false;
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-zeroing.ll b/llvm/test/CodeGen/AArch64/vecreduce-zeroing.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vecreduce-zeroing.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define dso_local noundef <4 x i32> @umaxv(<4 x i32> noundef %0) local_unnamed_addr #0 {
+; CHECK-LABEL: umaxv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxv s0, v0.4s
+; CHECK-NEXT:    ret
+  %2 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %0)
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %2, i64 0
+  ret <4 x i32> %3
+}
+
+define dso_local noundef <4 x i32> @uminv(<4 x i32> noundef %0) local_unnamed_addr #0 {
+; CHECK-LABEL: uminv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminv s0, v0.4s
+; CHECK-NEXT:    ret
+  %2 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %0)
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %2, i64 0
+  ret <4 x i32> %3
+}
+
+define dso_local noundef <4 x i32> @smaxv(<4 x i32> noundef %0) local_unnamed_addr #0 {
+; CHECK-LABEL: smaxv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxv s0, v0.4s
+; CHECK-NEXT:    ret
+  %2 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %0)
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %2, i64 0
+  ret <4 x i32> %3
+}
+
+define dso_local noundef <4 x i32> @sminv(<4 x i32> noundef %0) local_unnamed_addr #0 {
+; CHECK-LABEL: sminv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminv s0, v0.4s
+; CHECK-NEXT:    ret
+  %2 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %0)
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %2, i64 0
+  ret <4 x i32> %3
+}
+
+define dso_local noundef <4 x i32> @addv(<4 x i32> noundef %0) local_unnamed_addr #0 {
+; CHECK-LABEL: addv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    ret
+  %2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+  %3 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %2, i64 0
+  ret <4 x i32> %3
+}
+
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) #1
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) #1
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #1
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) #1
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1
+
+attributes #0 = { mustprogress nofree nosync nounwind readnone willreturn uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="128" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.2a" }
+attributes #1 = { nofree nosync nounwind readnone willreturn }
+