diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -859,59 +859,45 @@ MVT InVT = Node->getOperand(0).getSimpleValueType(); SDLoc DL(V); - // TODO: This method of selecting EXTRACT_SUBVECTOR should work - // with any type of extraction (fixed <-> scalable) but we don't yet - // correctly identify the canonical register class for fixed-length types. - // For now, keep the two paths separate. - if (VT.isScalableVector() && InVT.isScalableVector()) { - const auto *TRI = Subtarget->getRegisterInfo(); - unsigned SubRegIdx; - std::tie(SubRegIdx, Idx) = - RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( - InVT, VT, Idx, TRI); - - // If the Idx hasn't been completely eliminated then this is a subvector - // extract which doesn't naturally align to a vector register. These must - // be handled using instructions to manipulate the vector registers. - if (Idx != 0) - break; - - // If we haven't set a SubRegIdx, then we must be going between LMUL<=1 - // types (VR -> VR). This can be done as a copy. - if (SubRegIdx == RISCV::NoSubRegister) { - unsigned InRegClassID = - RISCVTargetLowering::getRegClassIDForVecVT(InVT); - assert(RISCVTargetLowering::getRegClassIDForVecVT(VT) == - RISCV::VRRegClassID && - InRegClassID == RISCV::VRRegClassID && - "Unexpected subvector extraction"); - SDValue RC = - CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT()); - SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - DL, VT, V, RC); - return ReplaceNode(Node, NewNode); - } - SDNode *NewNode = CurDAG->getMachineNode( - TargetOpcode::EXTRACT_SUBREG, DL, VT, V, - CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT())); - return ReplaceNode(Node, NewNode); - } - - if (VT.isFixedLengthVector() && InVT.isScalableVector()) { - // Bail when not a "cast" like extract_subvector. - if (Idx != 0) - break; + MVT SubVecContainerVT = VT; + // Establish the correct scalable-vector types for any fixed-length type. + if (VT.isFixedLengthVector()) + SubVecContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + *CurDAG, VT, *Subtarget); + if (InVT.isFixedLengthVector()) + InVT = RISCVTargetLowering::getContainerForFixedLengthVector( + *CurDAG, InVT, *Subtarget); + + const auto *TRI = Subtarget->getRegisterInfo(); + unsigned SubRegIdx; + std::tie(SubRegIdx, Idx) = + RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( + InVT, SubVecContainerVT, Idx, TRI); + + // If the Idx hasn't been completely eliminated then this is a subvector + // extract which doesn't naturally align to a vector register. These must + // be handled using instructions to manipulate the vector registers. + if (Idx != 0) + break; + // If we haven't set a SubRegIdx, then we must be going between + // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. + if (SubRegIdx == RISCV::NoSubRegister) { unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); - + assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == + InRegClassID && + "Unexpected subvector extraction"); SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT()); SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); - ReplaceNode(Node, NewNode); - return; + return ReplaceNode(Node, NewNode); } - break; + + SDNode *NewNode = CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, DL, VT, V, + CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT())); + return ReplaceNode(Node, NewNode); } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -383,6 +383,8 @@ decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI); + static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, + const RISCVSubtarget &Subtarget); private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -526,7 +526,7 @@ setOperationAction(Op, VT, Expand); // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); @@ -582,7 +582,7 @@ setOperationAction(Op, VT, Expand); // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -907,8 +907,8 @@ } // Return the largest legal scalable vector type that matches VT's element type. -static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, - const RISCVSubtarget &Subtarget) { +MVT RISCVTargetLowering::getContainerForFixedLengthVector( + SelectionDAG &DAG, MVT VT, const RISCVSubtarget &Subtarget) { assert(VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"); @@ -992,7 +992,8 @@ MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); SDLoc DL(Op); SDValue Mask, VL; @@ -1047,7 +1048,8 @@ if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); if (Lane >= 0) { - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, VT, Subtarget); V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!"); @@ -1891,7 +1893,8 @@ return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); } - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); + MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, VecVT, Subtarget); MVT I1ContainerVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); @@ -2251,10 +2254,15 @@ MVT SubVecVT = Op.getSimpleValueType(); MVT VecVT = Vec.getSimpleValueType(); - // TODO: Only handle scalable->scalable extracts for now, and revisit this - // for fixed-length vectors later. - if (!SubVecVT.isScalableVector() || !VecVT.isScalableVector()) - return Op; + bool IsSubVecFixedLen = SubVecVT.isFixedLengthVector(); + if (IsSubVecFixedLen) + SubVecVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, SubVecVT, Subtarget); + if (VecVT.isFixedLengthVector()) { + VecVT = RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VecVT, + Subtarget); + Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget); + } SDLoc DL(Op); unsigned OrigIdx = Op.getConstantOperandVal(1); @@ -2287,7 +2295,8 @@ // to place the desired subvector starting at element 0. SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); // For scalable vectors this must be further multiplied by vscale. - SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); + if (!IsSubVecFixedLen) + SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); @@ -2297,8 +2306,12 @@ // Now the vector is in the right position, extract our final subvector. This // should resolve to a COPY. - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, - DAG.getConstant(0, DL, XLenVT)); + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, + DAG.getConstant(0, DL, XLenVT)); + if (IsSubVecFixedLen) + Extract = convertFromScalableVector(Op.getSimpleValueType(), Extract, DAG, + Subtarget); + return Extract; } SDValue @@ -2308,7 +2321,8 @@ SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); @@ -2333,7 +2347,8 @@ // FIXME: We probably need to zero any extra bits in a byte for mask stores. // This is tricky to do. - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); @@ -2350,7 +2365,8 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const { MVT InVT = Op.getOperand(0).getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT, Subtarget); + MVT ContainerVT = RISCVTargetLowering::getContainerForFixedLengthVector( + DAG, InVT, Subtarget); MVT VT = Op.getSimpleValueType(); @@ -2463,7 +2479,8 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); MVT I1ContainerVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); @@ -2491,7 +2508,8 @@ MVT VT = Op.getSimpleValueType(); assert(useRVVForFixedLengthVectorVT(VT) && "Only expected to lower fixed length vector operation!"); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); // Create list of operands by converting existing ones to scalable types. SmallVector Ops; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @extract_v2i8_v8i8_0(<8 x i8>* %x, <2 x i8>* %y) { +; CHECK-LABEL: extract_v2i8_v8i8_0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 0) + store <2 x i8> %c, <2 x i8>* %y + ret void +} + +define void @extract_v2i8_v8i8_6(<8 x i8>* %x, <2 x i8>* %y) { +; CHECK-LABEL: extract_v2i8_v8i8_6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v25, 6 +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a1) +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 6) + store <2 x i8> %c, <2 x i8>* %y + ret void +} + +define void @extract_v2i32_v8i32_0(<8 x i32>* %x, <2 x i32>* %y) { +; LMULMAX2-LABEL: extract_v2i32_v8i32_0: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: addi a0, zero, 2 +; LMULMAX2-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX2-NEXT: vse32.v v26, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i32_v8i32_0: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, zero, 2 +; LMULMAX1-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 0) + store <2 x i32> %c, <2 x i32>* %y + ret void +} + +define void @extract_v2i32_v8i32_2(<8 x i32>* %x, <2 x i32>* %y) { +; LMULMAX2-LABEL: extract_v2i32_v8i32_2: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: addi a0, zero, 2 +; LMULMAX2-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX2-NEXT: vse32.v v27, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i32_v8i32_2: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: addi a0, zero, 2 +; LMULMAX1-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2) + store <2 x i32> %c, <2 x i32>* %y + ret void +} + +define void @extract_v2i32_v8i32_6(<8 x i32>* %x, <2 x i32>* %y) { +; LMULMAX2-LABEL: extract_v2i32_v8i32_6: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v25, v27, 4 +; LMULMAX2-NEXT: addi a0, zero, 2 +; LMULMAX2-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX2-NEXT: vse32.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i32_v8i32_6: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: addi a2, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: addi a0, zero, 2 +; LMULMAX1-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6) + store <2 x i32> %c, <2 x i32>* %y + ret void +} + +declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx) +declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)