Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -2438,6 +2438,8 @@ /// elements of the mask are returned as -1. static void getShuffleMask(Constant *Mask, SmallVectorImpl &Result); + static bool getShuffleMask(Value *Mask, SmallVectorImpl &Result); + /// Return the mask for this instruction as a vector of integers. Undefined /// elements of the mask are returned as -1. void getShuffleMask(SmallVectorImpl &Result) const { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -995,6 +995,10 @@ [], [IntrNoMem]>; +def int_experimental_vector_splatvector : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorElementType<0>], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1345,6 +1345,17 @@ Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); + + if (VT.isScalableVector()) { + auto INum = DAG.getConstant(Intrinsic::experimental_vector_splatvector, + getCurSDLoc(), MVT::i32); + + auto Splat = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VT, + INum, Op); + + return Splat; + } + Ops.assign(NumElements, Op); } @@ -3153,17 +3164,50 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + Value *MaskV = I.getOperand(2); SDLoc DL = getCurSDLoc(); - SmallVector Mask; - ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); - unsigned MaskNumElts = Mask.size(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + bool IsScalable = VT.isScalableVector(); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); + SmallVector Mask; + if (!ShuffleVectorInst::getShuffleMask(MaskV, Mask)) { + SDValue Mask = getValue(I.getOperand(2)); + unsigned NumElts = VT.getVectorNumElements(); + // We don't currently support variable shuffles on fixed-length vectors + assert(IsScalable && "Non-constant shuffle mask on fixed-length vector"); + + // We haven't introduced a vector_shuffle_var intrinsic to support shuffles + // where we need to extract or merge vectors. + if (NumElts != SrcNumElts) + llvm_unreachable("Haven't implemented VECTOR_SHUFFLE_VAR intrinsic yet"); + + // Currently only handling splats of a single value for scalable vectors + if (auto *CMask = dyn_cast(MaskV)) + if (CMask->isNullValue()) { + // Splat of first element. + auto FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + + auto INum = DAG.getConstant(Intrinsic::experimental_vector_splatvector, + DL, MVT::i32); + + auto Splat = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + INum, FirstElt); + + setValue(&I, Splat); + return; + } + llvm_unreachable("Haven't implemented VECTOR_SHUFFLE_VAR intrinsic yet"); + return; + } + + unsigned MaskNumElts = Mask.size(); if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; Index: lib/IR/ConstantFold.cpp =================================================================== --- lib/IR/ConstantFold.cpp +++ lib/IR/ConstantFold.cpp @@ -796,8 +796,9 @@ if (ConstantInt *CIdx = dyn_cast(Idx)) { // ee({w,x,y,z}, wrong_value) -> undef - if (CIdx->uge(Val->getType()->getVectorNumElements())) - return UndefValue::get(Val->getType()->getVectorElementType()); + if (!Val->getType()->getVectorIsScalable()) + if (CIdx->uge(Val->getType()->getVectorNumElements())) + return UndefValue::get(Val->getType()->getVectorElementType()); return Val->getAggregateElement(CIdx->getZExtValue()); } return nullptr; @@ -809,6 +810,10 @@ if (isa(Idx)) return UndefValue::get(Val->getType()); + // Everything after this point assumes you can iterate across Val. + if (Val->getType()->getVectorIsScalable()) + return nullptr; + ConstantInt *CIdx = dyn_cast(Idx); if (!CIdx) return nullptr; @@ -836,7 +841,8 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, Constant *Mask) { - unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); + auto *MaskTy = cast(Mask->getType()); + auto MaskNumElts = MaskTy->getElementCount(); Type *EltTy = V1->getType()->getVectorElementType(); // Undefined shuffle mask -> undefined value. @@ -846,11 +852,23 @@ // Don't break the bitcode reader hack. if (isa(Mask)) return nullptr; + if (MaskTy->isScalable()) { + // Is splat? + if (Mask->isNullValue()) { + Constant *Zero = Constant::getNullValue(MaskTy->getElementType()); + Constant *SplatVal = ConstantFoldExtractElementInstruction(V1, Zero); + // Is splat of zero? + if (SplatVal && SplatVal->isNullValue()) + return Constant::getNullValue(VectorType::get(EltTy, MaskNumElts)); + } + return nullptr; + } + unsigned SrcNumElts = V1->getType()->getVectorNumElements(); // Loop over the shuffle mask, evaluating each element. SmallVector Result; - for (unsigned i = 0; i != MaskNumElts; ++i) { + for (unsigned i = 0; i != MaskNumElts.Min; ++i) { int Elt = ShuffleVectorInst::getMaskValue(Mask, i); if (Elt == -1) { Result.push_back(UndefValue::get(EltTy)); Index: lib/IR/Constants.cpp =================================================================== --- lib/IR/Constants.cpp +++ lib/IR/Constants.cpp @@ -2093,8 +2093,9 @@ return FC; // Fold a few common cases. unsigned NElts = Mask->getType()->getVectorNumElements(); + bool Scalable = Mask->getType()->getVectorIsScalable(); Type *EltTy = V1->getType()->getVectorElementType(); - Type *ShufTy = VectorType::get(EltTy, NElts); + Type *ShufTy = VectorType::get(EltTy, NElts, Scalable); if (OnlyIfReducedTy == ShufTy) return nullptr; Index: lib/IR/ConstantsContext.h =================================================================== --- lib/IR/ConstantsContext.h +++ lib/IR/ConstantsContext.h @@ -150,7 +150,7 @@ ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(VectorType::get( cast(C1->getType())->getElementType(), - cast(C3->getType())->getNumElements()), + cast(C3->getType())->getElementCount()), Instruction::ShuffleVector, &Op<0>(), 3) { Op<0>() = C1; Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -1580,7 +1580,7 @@ const Twine &Name, Instruction *InsertBefore) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -1597,7 +1597,7 @@ const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -1684,6 +1684,20 @@ } } +bool ShuffleVectorInst::getShuffleMask(Value *Mask, + SmallVectorImpl &Result) { + VectorType *VecTy = cast(Mask->getType()); + if (VecTy->isScalable()) + return false; + + if (auto *CMask = dyn_cast(Mask)) { + getShuffleMask(CMask, Result); + return true; + } + + return false; +} + //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -609,6 +609,7 @@ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -626,6 +626,8 @@ setHasExtractBitsInsn(true); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to @@ -2682,6 +2684,8 @@ DAG.getConstant(1, dl, ScalarVT)); return SeriesVec; } + case Intrinsic::experimental_vector_splatvector: + return LowerSPLAT_VECTOR(Op, DAG); } } @@ -6424,6 +6428,36 @@ return GenerateTBL(Op, ShuffleMask, DAG); } +SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT ElemVT = VT.getScalarType(); + + SDValue SplatVal = Op.getOperand(1); + + switch (ElemVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); + break; + case MVT::i64: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); + break; + case MVT::i32: + // Fine as is + break; + default: + // Skip the other types for now; i1 will be lowered to + // ptrue/pfalse, and the fp types can be lowered to unpacked + // vectors so should just pass through. + llvm_unreachable("Unsupported SPLAT_VECTOR input operand type"); + break; + } + + return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); +} + static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); @@ -11210,6 +11244,13 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); return; + case ISD::INTRINSIC_WO_CHAIN: + // This is needed since the operation action for this type of intrinsic + // was set to 'custom' for i8 and i16, in order to legalize stepvector + // and splatvector intrinsics. We don't yet need to legalize the + // results, but this is where we can do it until we can promote to + // generic code. + return; } } Index: lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64SVEInstrInfo.td +++ lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -491,6 +491,11 @@ // Unoptimized vscale def : Pat<(AArch64vscale (simm6_32b:$imm)), (RDVLI_XI $imm)>; + // Duplicate GPR in all vector elements + def : Pat<(nxv16i8 (AArch64dup GPR32:$a)), (DUP_ZR_B $a)>; + def : Pat<(nxv8i16 (AArch64dup GPR32:$a)), (DUP_ZR_H $a)>; + def : Pat<(nxv4i32 (AArch64dup GPR32:$a)), (DUP_ZR_S $a)>; + def : Pat<(nxv2i64 (AArch64dup GPR64:$a)), (DUP_ZR_D $a)>; // Unoptimized seriesvector patterns def : Pat<(nxv16i8 (AArch64seriesvec GPR32:$start, GPR32:$step)), Index: test/CodeGen/AArch64/SVE/dup.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/SVE/dup.ll @@ -0,0 +1,40 @@ +; RUN: llc -verify-machineinstrs -mattr=+sve < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnueabi" + +define @dup_b(i8 %val) { +; CHECK-LABEL: dup_b: +; CHECK: mov z0.b, w0 +; CHECK-NEXT: ret + %insert = insertelement undef, i8 %val, i32 0 + %splat = shufflevector %insert, undef, zeroinitializer + ret %splat +} + +define @dup_h(i16 %val) { +; CHECK-LABEL: dup_h: +; CHECK: mov z0.h, w0 +; CHECK-NEXT: ret + %insert = insertelement undef, i16 %val, i32 0 + %splat = shufflevector %insert, undef, zeroinitializer + ret %splat +} + +define @dup_s(i32 %val) { +; CHECK-LABEL: dup_s: +; CHECK: mov z0.s, w0 +; CHECK-NEXT: ret + %insert = insertelement undef, i32 %val, i32 0 + %splat = shufflevector %insert, undef, zeroinitializer + ret %splat +} + +define @dup_d(i64 %val) { +; CHECK-LABEL: dup_d: +; CHECK: mov z0.d, x0 +; CHECK-NEXT: ret + %insert = insertelement undef, i64 %val, i32 0 + %splat = shufflevector %insert, undef, zeroinitializer + ret %splat +}