diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -3240,6 +3240,53 @@ ReplaceNode(N, N3); } +// NOTE: When targeting fixed length vectors at SVE the range of MVTs is runtime +// variable, hence this manual selection. +static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); + SDLoc DL(V); + switch (VT.getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + +// NOTE: When targeting fixed length vectors at SVE the range of MVTs is runtime +// variable, hence this manual selection. +static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { + assert(V.getValueType().isFixedLengthVector() && + "Expected fixed length vector type!"); + SDLoc DL(V); + switch (V.getValueType().getSizeInBits()) { + case 64: { + auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + case 128: { + auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); + auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, + SDValue(Container, 0), V, SubReg); + } + default: { + auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); + return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); + } + } +} + void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -3313,6 +3360,44 @@ return; break; + case ISD::EXTRACT_SUBVECTOR: { + // Bail when not a "cast" like extract_subvector. + if (cast(Node->getOperand(1))->getZExtValue() != 0) + break; + + // Bail when normal isel can do the job. + EVT InVT = Node->getOperand(0).getValueType(); + if (VT.isScalableVector() || InVT.isFixedLengthVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + + ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); + return; + } + + case ISD::INSERT_SUBVECTOR: { + // Bail when not a "cast" like insert_subvector. + if (cast(Node->getOperand(2))->getZExtValue() != 0) + break; + if (!Node->getOperand(0).isUndef()) + break; + + // Bail when normal isel should do the job. + EVT InVT = Node->getOperand(1).getValueType(); + if (VT.isFixedLengthVector() || InVT.isScalableVector()) + break; + + // NOTE: We can only get here when doing fixed length SVE code generation. + // We do manual selection because the types involved are not linked to real + // registers (despite being legal) and must be coerced into SVE registers. + + ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); + return; + } + case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8666,29 +8666,24 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { - assert(!Op.getValueType().isScalableVector() && - "Unexpected scalable type for custom lowering EXTRACT_SUBVECTOR"); - - EVT VT = Op.getOperand(0).getValueType(); - SDLoc dl(Op); - // Just in case... - if (!VT.isVector()) - return SDValue(); - - ConstantSDNode *Cst = dyn_cast(Op.getOperand(1)); - if (!Cst) - return SDValue(); - unsigned Val = Cst->getZExtValue(); + assert(Op.getValueType().isFixedLengthVector() && + "Only cases that extract a fixed length vector are supported!"); + EVT InVT = Op.getOperand(0).getValueType(); + unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); unsigned Size = Op.getValueSizeInBits(); + // Expand cases where EXTRACT_SUBREG cannot be used. + if (InVT.isScalableVector()) + return Idx == 0 ? Op : SDValue(); + // This will get lowered to an appropriate EXTRACT_SUBREG in ISel. - if (Val == 0) + if (Idx == 0 && InVT.getSizeInBits() <= 128) return Op; // If this is extracting the upper 64-bits of a 128-bit vector, we match // that directly. - if (Size == 64 && Val * VT.getScalarSizeInBits() == 64) + if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64) return Op; return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll @@ -0,0 +1,88 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=384 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 + +; Test we can code generater patterns of the form: +; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0 +; scalable_vector = ISD::INSERT_SUBVECTOR scalable_vector, fixed_length_vector, 0 +; +; NOTE: Currently shufflevector does not support scalable vectors so it cannot +; be used to model the above operations. Instead these tests rely on knowing +; how fixed length operation are lowered to scalable ones, with multiple blocks +; ensuring insert/extract sequences are not folded away. + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) #0 { +; CHECK-LABEL: subvector_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0] +; CHECK: st1w { [[DATA]] }, [[PG]], [x1] +; CHECK: ret + %a = load <8 x i32>, <8 x i32>* %in + br label %bb1 + +bb1: + store <8 x i32> %a, <8 x i32>* %out + ret void +} + +define void @subvector_v16i32(<16 x i32> *%in, <16 x i32>* %out) #0 { +; CHECK-LABEL: subvector_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0] +; VBITS_GE_512: st1w { [[DATA]] }, [[PG]], [x1] +; CHECKT: ret + %a = load <16 x i32>, <16 x i32>* %in + br label %bb1 + +bb1: + store <16 x i32> %a, <16 x i32>* %out + ret void +} + +define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) #0 { +; CHECK-LABEL: subvector_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0] +; VBITS_GE_1024: st1w { [[DATA]] }, [[PG]], [x1] +; CHECK: ret + %a = load <32 x i32>, <32 x i32>* %in + br label %bb1 + +bb1: + store <32 x i32> %a, <32 x i32>* %out + ret void +} + +define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) #0 { +; CHECK-LABEL: subvector_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0] +; VBITS_GE_2048: st1w { [[DATA]] }, [[PG]], [x1] +; CHECK: ret + %a = load <64 x i32>, <64 x i32>* %in + br label %bb1 + +bb1: + store <64 x i32> %a, <64 x i32>* %out + ret void +} + +attributes #0 = { "target-features"="+sve" }