Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3785,6 +3785,12 @@ APInt &UndefElts, unsigned Depth = 0) const; + /// Returns true if the given Opc is considered a canonical constant for the + /// target, which should not be transformed back into a BUILD_VECTOR. + virtual bool isTargetCanonicalConstantNode(unsigned Opc, EVT VT) const { + return Opc == ISD::SPLAT_VECTOR; + } + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2574,7 +2574,8 @@ // If we know the value of all of the demanded bits, return this as a // constant. - if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { + if (!isTargetCanonicalConstantNode(Op.getOpcode(), VT) && + DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNode *Op : Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1128,6 +1128,11 @@ TargetLoweringOpt &TLO, unsigned Depth) const override; + bool isTargetCanonicalConstantNode(unsigned Opc, EVT VT) const override { + return Opc == AArch64ISD::DUP || + TargetLowering::isTargetCanonicalConstantNode(Opc, VT); + } + // Normally SVE is only used for byte size vectors that do not fit within a // NEON vector. This changes when OverrideNEON is true, allowing SVE to be // used for 64bit and 128bit vectors as well. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1805,11 +1805,21 @@ /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them Known. void AArch64TargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; + case AArch64ISD::DUP: { + SDValue SrcOp = Op.getOperand(0); + Known = DAG.computeKnownBits(SrcOp, Depth + 1); + if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) { + assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() && + "Expected DUP implicit truncation"); + Known = Known.trunc(Op.getScalarValueSizeInBits()); + } + break; + } case AArch64ISD::CSEL: { KnownBits Known2; Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); @@ -14828,6 +14838,10 @@ const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); bool IsStrict = N0->isStrictFPOpcode(); + // extract(dup x) -> x + if (N0.getOpcode() == AArch64ISD::DUP) + return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + // Rewrite for pairwise fadd pattern // (f32 (extract_vector_elt // (fadd (vXf32 Other) Index: llvm/test/CodeGen/AArch64/arm64-build-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-build-vector.ll +++ llvm/test/CodeGen/AArch64/arm64-build-vector.ll @@ -57,8 +57,8 @@ ; CHECK-LABEL: widen_f16_build_vector: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #13294 -; CHECK-NEXT: dup.4h v0, w8 -; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: movk w8, #13294, lsl #16 +; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %1 = bitcast half* %addr to <2 x half>* store <2 x half> , <2 x half>* %1, align 2 Index: llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -164,10 +164,9 @@ ; CHECK-LABEL: testRightBad4x16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: ushr.4h v1, v1, #14 ; CHECK-NEXT: dup.4h v2, w8 ; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: usra.4h v0, v1, #14 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %and.i = and <4 x i16> %src1, @@ -224,10 +223,9 @@ ; CHECK-LABEL: testRightBad8x16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: ushr.8h v1, v1, #14 ; CHECK-NEXT: dup.8h v2, w8 ; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: usra.8h v0, v1, #14 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %and.i = and <8 x i16> %src1,