Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3785,6 +3785,12 @@
                                          APInt &UndefElts,
                                          unsigned Depth = 0) const;
 
+  /// Returns true if the given Opc is considered a canonical constant for the
+  /// target, which should not be transformed back into a BUILD_VECTOR.
+  virtual bool isTargetCanonicalConstantNode(unsigned Opc, EVT VT) const {
+    return Opc == ISD::SPLAT_VECTOR;
+  }
+
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     CombineLevel Level;
Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2574,7 +2574,8 @@
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+  if (!isTargetCanonicalConstantNode(Op.getOpcode(), VT) &&
+      DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
     // Avoid folding to a constant if any OpaqueConstant is involved.
     const SDNode *N = Op.getNode();
     for (SDNode *Op :
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1128,6 +1128,11 @@
                                          TargetLoweringOpt &TLO,
                                          unsigned Depth) const override;
 
+  bool isTargetCanonicalConstantNode(unsigned Opc, EVT VT) const override {
+    return Opc == AArch64ISD::DUP ||
+           TargetLowering::isTargetCanonicalConstantNode(Opc, VT);
+  }
+
   // Normally SVE is only used for byte size vectors that do not fit within a
   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
   // used for 64bit and 128bit vectors as well.
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1805,11 +1805,21 @@
 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
 /// Mask are known to be either zero or one and return them Known.
 void AArch64TargetLowering::computeKnownBitsForTargetNode(
-    const SDValue Op, KnownBits &Known,
-    const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
+    const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+    const SelectionDAG &DAG, unsigned Depth) const {
   switch (Op.getOpcode()) {
   default:
     break;
+  case AArch64ISD::DUP: {
+    SDValue SrcOp = Op.getOperand(0);
+    Known = DAG.computeKnownBits(SrcOp, Depth + 1);
+    if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
+      assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
+             "Expected DUP implicit truncation");
+      Known = Known.trunc(Op.getScalarValueSizeInBits());
+    }
+    break;
+  }
   case AArch64ISD::CSEL: {
     KnownBits Known2;
     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
@@ -14828,6 +14838,10 @@
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
   bool IsStrict = N0->isStrictFPOpcode();
 
+  // extract(dup x) -> x
+  if (N0.getOpcode() == AArch64ISD::DUP)
+    return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+
   // Rewrite for pairwise fadd pattern
   //   (f32 (extract_vector_elt
   //           (fadd (vXf32 Other)
Index: llvm/test/CodeGen/AArch64/arm64-build-vector.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -57,8 +57,8 @@
 ; CHECK-LABEL: widen_f16_build_vector:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #13294
-; CHECK-NEXT:    dup.4h v0, w8
-; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    movk w8, #13294, lsl #16
+; CHECK-NEXT:    str w8, [x0]
 ; CHECK-NEXT:    ret
   %1 = bitcast half* %addr to <2 x half>*
   store <2 x half> <half 0xH33EE, half 0xH33EE>, <2 x half>* %1, align 2
Index: llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -164,10 +164,9 @@
 ; CHECK-LABEL: testRightBad4x16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    ushr.4h v1, v1, #14
 ; CHECK-NEXT:    dup.4h v2, w8
 ; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
+; CHECK-NEXT:    usra.4h v0, v1, #14
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500>
@@ -224,10 +223,9 @@
 ; CHECK-LABEL: testRightBad8x16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    ushr.8h v1, v1, #14
 ; CHECK-NEXT:    dup.8h v2, w8
 ; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
+; CHECK-NEXT:    usra.8h v0, v1, #14
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500>