Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2313,6 +2313,11 @@ def mbranch_protection_EQ : Joined<["-"], "mbranch-protection=">, HelpText<"Enforce targets of indirect branches and function returns">; +def msve_vector_bits : Joined<["-"], "msve-vector-bits=">, + Group, + HelpText<"Specify the size in bits of an SVE vector register." + " Has no effect unless SVE is enabled. (Default is \"scalable\")">; + def msimd128 : Flag<["-"], "msimd128">, Group; def munimplemented_simd128 : Flag<["-"], "munimplemented-simd128">, Group; def mno_unimplemented_simd128 : Flag<["-"], "mno-unimplemented-simd128">, Group; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -1751,6 +1751,22 @@ if (IndirectBranches) CmdArgs.push_back("-mbranch-target-enforce"); } + + if (any_of(CmdArgs, + [](const char* Arg){return (strcmp(Arg, "+sve") == 0 || + strcmp(Arg, "+sve2") == 0);})) { + if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits)) { + StringRef Bits = A->getValue(); + if (Bits != "scalable") { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-aarch64-sve-vector-bits-min=" + + Bits)); + //CmdArgs.push_back("-mllvm"); + //CmdArgs.push_back(Args.MakeArgString("-aarch64-sve-vector-bits-max=" + // + Bits)); + } + } + } } void Clang::AddMIPSTargetArgs(const ArgList &Args, Index: llvm/include/llvm/CodeGen/ValueTypes.h =================================================================== --- llvm/include/llvm/CodeGen/ValueTypes.h +++ llvm/include/llvm/CodeGen/ValueTypes.h @@ -163,6 +163,12 @@ return V.isScalableVector(); } + /// Return true if this is a vector type where the runtime + /// length is machine independent + bool isFixedVector() const { + return isVector() && !isScalableVector(); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return isSimple() ? V.is16BitVector() : isExtended16BitVector(); Index: llvm/include/llvm/Support/MachineValueType.h =================================================================== --- llvm/include/llvm/Support/MachineValueType.h +++ llvm/include/llvm/Support/MachineValueType.h @@ -324,6 +324,12 @@ SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE); } + /// Return true if this is a vector value type where the + /// runtime length is machine independent + bool isFixedVector() const { + return isVector() && !isScalableVector(); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19440,7 +19440,8 @@ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. - if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + if ((InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && + !VT.isScalableVector()) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); auto InVecT = InVec.getValueType(); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1388,12 +1388,15 @@ } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + bool IsConcat = Node->getOpcode() == ISD::CONCAT_VECTORS; + // We can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load + // aligned object on the stack, store each operand into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + EVT MemVT = IsConcat ? Node->getOperand(0).getValueType() : + VT.getVectorElementType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); @@ -1402,7 +1405,7 @@ // Emit a store of each element to the stack slot. SmallVector Stores; - unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1416,10 +1419,10 @@ // If the destination vector element type is narrower than the source // element type, only store the bits necessary. - if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + if (MemVT.bitsLT(Node->getOperand(i).getValueType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), EltVT)); + PtrInfo.getWithOffset(Offset), MemVT)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2481,6 +2481,9 @@ if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. + if (Op.getValueType().isScalableVector()) + return Known; // Unknown number of elts so assume we don't know anything. + KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || @@ -5364,7 +5367,7 @@ assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); - if (N2C) { + if (N2C && !N1.getValueType().isScalableVector()) { assert((VT.getVectorNumElements() + N2C->getZExtValue() <= N1.getValueType().getVectorNumElements()) && "Extract subvector overflow!"); @@ -5571,7 +5574,9 @@ "Dest and insert subvector source types must match!"); assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); - if (isa(Index)) { + if (isa(Index) && + !N1.getValueType().isScalableVector() && + !N2.getValueType().isScalableVector()) { assert((N2.getValueType().getVectorNumElements() + cast(Index)->getZExtValue() <= VT.getVectorNumElements()) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -879,6 +879,8 @@ } case ISD::INSERT_SUBVECTOR: { SDValue Base = Op.getOperand(0); + if (Base.getValueType().isScalableVector()) + break; SDValue Sub = Op.getOperand(1); EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); @@ -919,6 +921,8 @@ case ISD::EXTRACT_SUBVECTOR: { // If index isn't constant, assume we need all the source vector elements. SDValue Src = Op.getOperand(0); + if (Src.getValueType().isScalableVector()) + break; ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); @@ -2276,6 +2280,8 @@ if (!isa(Op.getOperand(2))) break; SDValue Base = Op.getOperand(0); + if (Base.getValueType().isScalableVector()) + break; SDValue Sub = Op.getOperand(1); EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); @@ -2308,6 +2314,8 @@ } case ISD::EXTRACT_SUBVECTOR: { SDValue Src = Op.getOperand(0); + if (Src.getValueType().isScalableVector()) + break; ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2977,6 +2977,49 @@ ReplaceNode(N, N3); } +static SDNode* ExtractFromSVEReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(VT.isFixedVector() && "Expected fixed length vector type!"); + SDLoc DL(V); + switch (VT.getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + +static SDNode* InsertIntoSVEReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(V.getValueType().isFixedVector() && + "Expected fixed length vector type!"); + SDLoc DL(V); + switch (V.getValueType().getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -3050,6 +3093,148 @@ return; break; + case ISD::EXTRACT_SUBVECTOR: { + // Bail when not a "cast" like extract_subvector. + auto *LaneNode = dyn_cast(Node->getOperand(1)); + if (!LaneNode || (LaneNode->getZExtValue() != 0)) + break; + + // Bail when normal isel can do the job. + EVT InVT = Node->getOperand(0).getValueType(); + if (VT.isScalableVector() || !InVT.isScalableVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + + ReplaceNode(Node, ExtractFromSVEReg(CurDAG, VT, Node->getOperand(0))); + return; + } + + case ISD::INSERT_SUBVECTOR: { + // Bail when not a "cast" like insert_subvector. + auto *LaneNode = dyn_cast(Node->getOperand(2)); + if (!LaneNode || (LaneNode->getZExtValue() != 0)) + break; + + // Bail when normal isel should do the job. + EVT InVT = Node->getOperand(1).getValueType(); + if (!VT.isScalableVector() || InVT.isScalableVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + + // Bail when inserting into real data. (HACK) + if (!Node->getOperand(0).isUndef()) + break; + + ReplaceNode(Node, InsertIntoSVEReg(CurDAG, VT, Node->getOperand(1))); + return; + } + + case ISD::TRUNCATE: { + EVT InVT = Node->getOperand(0).getValueType(); + // Only intercept combinations not handled by tablegen based isel patterns. + if (!VT.isVector() || VT.isScalableVector()) + break; + if ((VT.getSizeInBits() == 64) && (InVT.getSizeInBits() == 128)) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + SDLoc DL(Node); + + EVT ContainerVT; + switch (InVT.getVectorElementType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("unimplemented container type"); + case MVT::i16: ContainerVT = EVT(MVT::nxv8i16); break; + case MVT::i32: ContainerVT = EVT(MVT::nxv4i32); break; + case MVT::i64: ContainerVT = EVT(MVT::nxv2i64); break; + } + + // Repeatedly truncate operand until the result is of the desired type. + auto Op = InsertIntoSVEReg(CurDAG, ContainerVT, Node->getOperand(0)); + while (ContainerVT.getVectorElementType() != VT.getVectorElementType()) { + unsigned Opc; + switch (ContainerVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("unimplemented container type"); + case MVT::nxv8i16: + Opc = AArch64::UZP1_ZZZ_B; + ContainerVT = MVT::nxv16i8; + break; + case MVT::nxv4i32: + Opc = AArch64::UZP1_ZZZ_H; + ContainerVT = MVT::nxv8i16; + break; + case MVT::nxv2i64: + Opc = AArch64::UZP1_ZZZ_S; + ContainerVT = MVT::nxv4i32; + break; + } + + SDValue Src = SDValue(Op, 0); + Op = CurDAG->getMachineNode(Opc, DL, ContainerVT, Src, Src); + } + + ReplaceNode(Node, ExtractFromSVEReg(CurDAG, VT, SDValue(Op, 0))); + return; + } + + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: { + EVT InVT = Node->getOperand(0).getValueType(); + // Only intercept combinations not handled by tablegen based isel patterns. + if (!VT.isVector() || VT.isScalableVector()) + break; + if ((VT.getSizeInBits() == 128) && (InVT.getSizeInBits() == 64)) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + SDLoc DL(Node); + + EVT ContainerVT; + switch (InVT.getVectorElementType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("unimplemented container type"); + case MVT::i8: ContainerVT = EVT(MVT::nxv16i8); break; + case MVT::i16: ContainerVT = EVT(MVT::nxv8i16); break; + case MVT::i32: ContainerVT = EVT(MVT::nxv4i32); break; + } + + // Repeatedly zero-extend operand until the result is of the desired type. + auto Op = InsertIntoSVEReg(CurDAG, ContainerVT, Node->getOperand(0)); + while (ContainerVT.getVectorElementType() != VT.getVectorElementType()) { + unsigned Opc; + switch (ContainerVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("unimplemented container type"); + case MVT::nxv16i8: + Opc = AArch64::UUNPKLO_ZZ_H; + ContainerVT = MVT::nxv8i16; + break; + case MVT::nxv8i16: + Opc = AArch64::UUNPKLO_ZZ_S; + ContainerVT = MVT::nxv4i32; + break; + case MVT::nxv4i32: + Opc = AArch64::UUNPKLO_ZZ_D; + ContainerVT = MVT::nxv2i64; + break; + } + + Op = CurDAG->getMachineNode(Opc, DL, ContainerVT, SDValue(Op, 0)); + } + + ReplaceNode(Node, ExtractFromSVEReg(CurDAG, VT, SDValue(Op, 0))); + return; + } + case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -331,6 +331,7 @@ /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerOperationDefault(SDValue Op, SelectionDAG &DAG) const; const char *getTargetNodeName(unsigned Opcode) const override; @@ -610,6 +611,13 @@ bool isVarArg) const override; /// Used for exception handling on Win64. bool needsFixedCatchObjects() const override; + + bool + shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + + bool useSVEForFixedLengthVectors() const; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -618,6 +626,7 @@ bool isExtFreeImpl(const Instruction *Ext) const override; void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT); + void addTypeForFixedLengthSVE(MVT VT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -104,6 +104,11 @@ cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false)); +static cl::opt +UseSVEForAll("aarch64-stress-fixed-length-sve", cl::Hidden, + cl::desc("Use SVE for fixed length vectors that would ordinarily use NEON"), + cl::init(true)); + // FIXME: The necessary dtprel relocations don't seem to be supported // well in the GNU bfd and gold linkers at the moment. Therefore, by // default, for now, fall back to GeneralDynamic code generation. @@ -183,6 +188,25 @@ addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); + if (useSVEForFixedLengthVectors()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) + // Make everything that can fit within an SVE register legal, unless... + if (VT.getSizeInBits() <= STI.getMinSVEVectorSizeInBits()) + // Fixed length predicates are promoted to i8. + if ((VT.getVectorElementType() != MVT::i1) && VT.isPow2VectorType()) + // Ensure Neon MVTs only belong to a single register class. + if (VT.getSizeInBits() > 128) + addRegisterClass(VT, &AArch64::ZPRRegClass); + + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) + // Make everything that can fit within an SVE register legal, unless... + if (VT.getSizeInBits() <= STI.getMinSVEVectorSizeInBits()) + if (VT.isPow2VectorType()) + // Ensure Neon MVTs only belong to a single register class. + if (VT.getSizeInBits() > 128) + addRegisterClass(VT, &AArch64::ZPRRegClass); + } + for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) { setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); @@ -834,6 +858,14 @@ } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + + // See TODO in addTypeForFixedLengthSVE for why this is here. + if (useSVEForFixedLengthVectors()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) + addTypeForFixedLengthSVE(VT); + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) + addTypeForFixedLengthSVE(VT); + } } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); @@ -918,6 +950,46 @@ } } +void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { + assert(VT.isFixedVector() && "Expected fixed length vector type!"); + +// TODO: Enable this when we can be called before computeRegisterProperties. +// // Make everything that can fit within an SVE register legal, unless... +// if (VT.getSizeInBits() <= Subtarget->getMinSVEVectorSizeInBits()) +// // Fixed length predicates are promoted to i8. +// if ((VT.getVectorElementType() != MVT::i1) && VT.isPow2VectorType()) +// // Ensure Neon MVTs only belong to a single register class. +// if (VT.getSizeInBits() > 128) +// addRegisterClass(VT, &AArch64::ZPRRegClass); + + // By default everything requires expanding. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) + setOperationAction(Op, VT, Expand); + + for (MVT InnerVT : MVT::integer_fixedlen_vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + + // Use common code for all type legalisation. + if (!isTypeLegal(VT)) + return; + + // These are used to legalise operations and represent the only operations + // that are legal to operate on fixed length vectors. + setOperationAction(ISD::ANY_EXTEND, VT, Legal); + setOperationAction(ISD::TRUNCATE, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + + // Lower fixed length vector operations to scalable equivalents. + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); +} + void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &AArch64::FPR64RegClass); addTypeForNEON(VT, MVT::v2i32); @@ -3041,8 +3113,8 @@ return SDValue(); } -SDValue AArch64TargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerOperationDefault(SDValue Op, + SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Custom lowering: "); LLVM_DEBUG(Op.dump()); @@ -3174,6 +3246,208 @@ } } +// Return the largest legal scalable vector type that matches VT's element type. +static EVT getContainerForFixedVector(SelectionDAG &DAG, EVT VT) { + assert(VT.isFixedVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + "Expected legal fixed length vector!"); + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("unimplemented operand"); + case MVT::i8: return EVT(MVT::nxv16i8); + case MVT::i16: return EVT(MVT::nxv8i16); + case MVT::i32: return EVT(MVT::nxv4i32); + case MVT::i64: return EVT(MVT::nxv2i64); + case MVT::f16: return EVT(MVT::nxv8f16); + case MVT::f32: return EVT(MVT::nxv4f32); + case MVT::f64: return EVT(MVT::nxv2f64); + } +} + +// Return a PTRUE with active lanes corresponding to the extent of VT. +static SDValue CreatePredicateForFixedVector(SelectionDAG &DAG, SDLoc &DL, + EVT VT) { + assert(VT.isFixedVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + "Expected legal fixed length vector!"); + + int PgPattern; + switch (VT.getVectorNumElements()) { + default: llvm_unreachable("unimplemented operand"); + case 32: PgPattern = AArch64SVEPredPattern::vl32; break; + case 16: PgPattern = AArch64SVEPredPattern::vl16; break; + case 8: PgPattern = AArch64SVEPredPattern::vl8; break; + case 4: PgPattern = AArch64SVEPredPattern::vl4; break; + case 2: PgPattern = AArch64SVEPredPattern::vl2; break; + case 1: PgPattern = AArch64SVEPredPattern::vl1; break; + } + +#if 0 + // This allows the use of unpredicated instructions when available. + if (VT.getSizeInBits() == Subtarget->getMaxSVEVectorSizeInBits()) + PgPattern = AArch64SVEPredPattern::all; +#endif + + MVT MaskVT; + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("unimplemented operand"); + case MVT::i8: MaskVT = MVT::nxv16i1; break; + case MVT::i16: + case MVT::f16: MaskVT = MVT::nxv8i1; break; + case MVT::i32: + case MVT::f32: MaskVT = MVT::nxv4i1; break; + case MVT::i64: + case MVT::f64: MaskVT = MVT::nxv2i1; break; + } + + return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT, + DAG.getTargetConstant(PgPattern, DL, MVT::i64)); +} + +// Grow V to consume an entire SVE register. +static SDValue ConvertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) { + assert(VT.isScalableVector() && + "Expected to convert into a scalable vector!"); + assert(V.getValueType().isVector() && !V.getValueType().isScalableVector() && + "Expected a fixed length vector operand!"); + SDLoc DL(V); + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); +} + +// Shrink V so it's just big enough to maintain a VT's worth of data. +static SDValue ConvertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) { + assert(VT.isFixedVector() && + "Expected to convert into a fixed length vector!"); + assert(V.getValueType().isScalableVector() && + "Expected a scalable vector operand!"); + SDLoc DL(V); + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); +} + +static SDValue LowerFixedVectorLoadToSVE(SelectionDAG &DAG, SDValue Op) { + auto Load = cast(Op); + + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT ContainerVT = getContainerForFixedVector(DAG, VT); + + auto NewLoad = DAG.getMaskedLoad(ContainerVT, DL, + Load->getChain(), + Load->getBasePtr(), + Load->getOffset(), + CreatePredicateForFixedVector(DAG, DL, VT), + DAG.getUNDEF(ContainerVT), + Load->getMemoryVT(), + Load->getMemOperand(), + Load->getAddressingMode(), + Load->getExtensionType()); + + auto Result = ConvertFromScalableVector(DAG, VT, NewLoad); + SDValue MergedValues[2] = { Result, Load->getChain() }; + return DAG.getMergeValues(MergedValues, DL); +} + +static SDValue LowerFixedVectorStoreToSVE(SelectionDAG &DAG, SDValue Op) { + auto Store = cast(Op); + + SDLoc DL(Op); + EVT VT = Store->getValue().getValueType(); + EVT ContainerVT = getContainerForFixedVector(DAG, VT); + + auto NewValue = ConvertToScalableVector(DAG, ContainerVT, Store->getValue()); + return DAG.getMaskedStore(Store->getChain(), DL, + NewValue, + Store->getBasePtr(), + Store->getOffset(), + CreatePredicateForFixedVector(DAG, DL, VT), + Store->getMemoryVT(), + Store->getMemOperand(), + Store->getAddressingMode(), + Store->isTruncatingStore()); +} + +SDValue AArch64TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + if (!useSVEForFixedLengthVectors()) + return LowerOperationDefault(Op, DAG); + + SDLoc DL(Op); + + // If we get here then we have a vector operation whose operands are legal + // but too big to be implemented using Neon. Lower the operation to SVE. + + LLVM_DEBUG(dbgs() << "SVE lowering: "); + LLVM_DEBUG(Op.dump()); + + switch (Op.getOpcode()) { + default: + return LowerOperationDefault(Op, DAG); + + // EXTRACT_SUBVECTOR/INSERT_SUBVECTOR are used to "cast" between scalable + // and fixed length vector types. These represent legal operations with + // everything else requiring custom handling. + case ISD::EXTRACT_SUBVECTOR: { + EVT VT = Op.getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); + + if (VT.isFixedVector() && InVT.isFixedVector()) { + if (!UseSVEForAll && InVT.getSizeInBits() <= 128) + return Op; // Use Neon + + return SDValue(); // Expand fixed length extract... + } + return Op; // ...with everything else being legal. + } + case ISD::INSERT_SUBVECTOR: { + EVT VT = Op.getValueType(); + EVT InVT = Op.getOperand(1).getValueType(); + + if (VT.isFixedVector() && InVT.isFixedVector()) { + if (!UseSVEForAll && VT.getSizeInBits() <= 128) + return Op; // Use Neon. + + return SDValue(); // Expand fixed length inserts... + } + return Op; // ...with everything else being legal. + } + + // Convert all vector loads to masked_loads. + case ISD::LOAD: { + EVT VT = Op.getValueType(); + if (!VT.isFixedVector()) + return LowerOperationDefault(Op, DAG); + + if (!UseSVEForAll && VT.getSizeInBits() <= 128) + return Op; // Use Neon. + return LowerFixedVectorLoadToSVE(DAG, Op); + } + + // Convert all vector stores to masked_stores. + case ISD::STORE: { + EVT InVT = cast(Op)->getValue().getValueType(); + if (!InVT.isFixedVector()) + return LowerOperationDefault(Op, DAG); + + if (!UseSVEForAll && InVT.getSizeInBits() <= 128) + return Op; // Use Neon. + return LowerFixedVectorStoreToSVE(DAG, Op); + } + } +} + +// Should we expand the build vector with shuffles? +bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { + if (useSVEForFixedLengthVectors()) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + +bool AArch64TargetLowering::useSVEForFixedLengthVectors() const { + // Prefer Neon unless larger SVE registers are available. + return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256; +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -493,6 +493,9 @@ } void mirFileLoaded(MachineFunction &MF) const override; + + unsigned getMaxSVEVectorSizeInBits() const; + unsigned getMinSVEVectorSizeInBits() const; }; } // End llvm namespace Index: llvm/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -47,6 +47,18 @@ cl::desc("Call nonlazybind functions via direct GOT load"), cl::init(false), cl::Hidden); +static cl::opt +SVEVectorBitsMax("aarch64-sve-vector-bits-max", + cl::desc("Assume SVE vector registers are at most this big, " + "with zero meaning no maximum size is assumed."), + cl::init(0), cl::Hidden); + +static cl::opt +SVEVectorBitsMin("aarch64-sve-vector-bits-min", + cl::desc("Assume SVE vector registers are at least this big, " + "with zero meaning no minimum size is assumed."), + cl::init(0), cl::Hidden); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, StringRef CPUString) { @@ -297,3 +309,17 @@ if (!MFI.isMaxCallFrameSizeComputed()) MFI.computeMaxCallFrameSize(MF); } + +unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const { + assert(HasSVE && "Tried to get SVE vector length without SVE support!"); + assert(((SVEVectorBitsMax >= SVEVectorBitsMin) || (SVEVectorBitsMax == 0)) && + "Minimum SVE vector size cannot be larger that its maximum!"); + return SVEVectorBitsMax; +} + +unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const { + assert(HasSVE && "Tried to get SVE vector length without SVE support!"); + assert(((SVEVectorBitsMax >= SVEVectorBitsMin) || (SVEVectorBitsMax == 0)) && + "Minimum SVE vector size cannot be larger that its maximum!"); + return SVEVectorBitsMin; +} Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -98,6 +98,8 @@ unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { + if (ST->hasSVE()) + return std::max(ST->getMinSVEVectorSizeInBits(), 128u); if (ST->hasNEON()) return 128; return 0; @@ -153,6 +155,9 @@ if (!isa(DataType) || !ST->hasSVE()) return false; + if (ST->getMinSVEVectorSizeInBits() != 0) + return false; // HACK + Type *Ty = DataType->getVectorElementType(); if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true;