diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -110,6 +110,19 @@ BDECOMPRESSW, // Packed SIMD Extension BITREV, + INSERTB, + INSERTH, + INSERTW, + SWAP8, + SWAP16, + PKBB16, + PKBT16, + PKTT16, + PKTB16, + PKBB32, + PKBT32, + PKTT32, + PKTB32, // Vector Extension // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand // for the VL value to be used for the operation. @@ -533,6 +546,8 @@ SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, int64_t ExtTrueVal) const; SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRVV_INSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRVP_INSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -810,6 +810,8 @@ } if (Subtarget.hasStdExtP()) { + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + const auto addTypeForP = [&](MVT VT, MVT PromotedBitwiseVT) { // Expand all builtin opcodes. for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) @@ -822,6 +824,10 @@ setOperationAction(ISD::XOR, VT, Legal); setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + // Promote load and store operations. setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, PromotedBitwiseVT); @@ -1403,8 +1409,8 @@ return convertFromScalableVector(VT, Splat, DAG, Subtarget); } -static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static SDValue lowerRVV_BUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert(VT.isFixedLengthVector() && "Unexpected vector!"); @@ -1673,6 +1679,88 @@ return SDValue(); } +static bool +getBuildVectorConstInts(ArrayRef Values, MVT VecTy, SelectionDAG &DAG, + MutableArrayRef Consts) { + MVT EltTy = VecTy.getVectorElementType(); + unsigned EltWidth = EltTy.getSizeInBits(); + IntegerType *IntTy = IntegerType::get(*DAG.getContext(), EltWidth); + bool AllConst = true; + + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + SDValue V = Values[i]; + if (V.isUndef()) { + Consts[i] = ConstantInt::get(IntTy, 0); + continue; + } + + // Make sure to always cast to IntTy. + if (auto *CN = dyn_cast(V.getNode())) + Consts[i] = CN->getConstantIntValue(); + else + AllConst = false; + } + + return AllConst; +} + +static SDValue lowerRVP_BUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VecTy = Op.getSimpleValueType(); + assert((VecTy == MVT::v4i8 || VecTy == MVT::v2i16 || VecTy == MVT::v8i8 || + VecTy == MVT::v4i16 || VecTy == MVT::v2i32) && + "Unexpected VTs!"); + + SDLoc DL(Op); + MVT EltTy = VecTy.getVectorElementType(); + MVT XLenVT = Subtarget.getXLenVT(); + + if (ISD::isBuildVectorAllOnes(Op.getNode())) + return DAG.getBitcast(VecTy, DAG.getAllOnesConstant(DL, XLenVT)); + + SmallVector Elems; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) + Elems.push_back(Op.getOperand(i)); + + unsigned Num = Op.getNumOperands(); + SmallVector Consts(Num); + bool AllConst = getBuildVectorConstInts(Elems, VecTy, DAG, Consts); + + // If all elements are constant, generate a XLenVT constant. + if (AllConst) { + uint64_t V = 0; + unsigned W = EltTy.getSizeInBits(); + uint64_t Mask = (EltTy == MVT::i8) + ? 0xff + : (EltTy == MVT::i16) ? 0xffffull : 0xffffffffull; + for (unsigned I = 0; I != Num; ++I) + V = (V << W) | (Consts[Num - 1 - I]->getZExtValue() & Mask); + return DAG.getBitcast(VecTy, DAG.getConstant(V, DL, XLenVT)); + } + + // Insert each element into a vector one by one. + SDValue Vec = DAG.getUNDEF(VecTy); + for (unsigned I = 0; I < Num; ++I) + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecTy, Vec, Elems[I], + DAG.getConstant(I, DL, XLenVT)); + return Vec; +} + +static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (Subtarget.hasStdExtV()) { + if (SDValue V = lowerRVV_BUILD_VECTOR(Op, DAG, Subtarget)) + return V; + } + + if (Subtarget.hasStdExtP()) { + if (SDValue V = lowerRVP_BUILD_VECTOR(Op, DAG, Subtarget)) + return V; + } + + return SDValue(); +} + static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG) { if (isa(Lo) && isa(Hi)) { @@ -1731,8 +1819,8 @@ return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); } -static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static SDValue lowerRVV_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc DL(Op); @@ -1924,6 +2012,207 @@ return convertFromScalableVector(VT, Gather, DAG, Subtarget); } +static SDValue lowerRVP_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VecTy = Op.getSimpleValueType(); + assert((VecTy == MVT::v4i8 || VecTy == MVT::v2i16 || VecTy == MVT::v8i8 || + VecTy == MVT::v4i16 || VecTy == MVT::v2i32) && + "Unexpected VTs!"); + + SDLoc DL(Op); + MVT EltTy = VecTy.getVectorElementType(); + unsigned NumElts = VecTy.getVectorNumElements(); + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + ArrayRef Mask = SVN->getMask(); + + // Normalize the mask so that the first non-negative index comes from + // the first operand. + SmallVector NormalizedMask(Mask.begin(), Mask.end()); + unsigned F = llvm::find_if(Mask, [](int M) { return M >= 0; }) - Mask.data(); + // All indexes are negative. + if (F == Mask.size()) + return DAG.getUNDEF(VecTy); + // Swap operands if the first non-negative index is to the second operand. + if (Mask[F] >= int(NumElts)) { + ShuffleVectorSDNode::commuteMask(NormalizedMask); + std::swap(V1, V2); + } + + // Express the shuffle mask in terms of bytes. + SmallVector ByteMask; + unsigned ElemBytes = EltTy.getSizeInBits() / 8; + for (unsigned i = 0, e = NormalizedMask.size(); i != e; ++i) { + int M = NormalizedMask[i]; + if (M < 0) { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(-1); + } else { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(M * ElemBytes + j); + } + } + + assert(ByteMask.size() <= 8); + + // All non-undef (non-negative) indexes are well within [0..127], so they + // fit in a single byte. Build two 64-bit words: + // - MaskIdx where each byte is the corresponding index (for non-negative + // indexes), and 0xff for negative indexes, and + // - MaskUnd that has 0xff for each negative index. + uint64_t MaskIdx = 0; + uint64_t MaskUnd = 0; + for (unsigned I = 0; I < ByteMask.size(); ++I) { + unsigned S = 8 * I; + uint64_t M = ByteMask[I] & 0xff; + if (M == 0xff) + MaskUnd |= M << S; + MaskIdx |= M << S; + } + + // Bitcast vector type to XLenVT. + V1 = DAG.getBitcast(XLenVT, V1); + V2 = DAG.getBitcast(XLenVT, V2); + + // The indexes of bytes for operands look like: + // V2 V1 + // (7 6 5 4) (3 2 1 0) -> 4 bytes for RV32 + // (f e d c b a 9 8) (7 6 5 4 3 2 1 0) -> 8 bytes for RV64 + + // Swap byte + // Vector is built by indexes (0 1 2 3) or (4 5 6 7 0 1 2 3) + if (MaskIdx == (0x00010203 | MaskUnd) || + MaskIdx == (0x0405060700010203ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::SWAP8, DL, XLenVT, V1); + V = DAG.getNode(RISCVISD::SWAP16, DL, XLenVT, V); + return DAG.getBitcast(VecTy, V); + } + // Vector is built by indexes (0 1 2 3 4 5 6 7) + if (MaskIdx == (0x0001020304050607 | MaskUnd)) { + SDValue V = DAG.getNode(ISD::BSWAP, DL, XLenVT, V1); + return DAG.getBitcast(VecTy, V); + } + + // Swap byte within halfword + // Vector is built by indexes (2 3 0 1) or (6 7 4 5 2 3 0 1) + if (MaskIdx == (0x02030001 | MaskUnd) || + MaskIdx == (0x0607040502030001 | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::SWAP8, DL, XLenVT, V1); + return DAG.getBitcast(VecTy, V); + } + + // Swap halfword within word + // Vector is built by indexes (1 0 3 2) or (5 4 7 6 1 0 3 2) + if (MaskIdx == (0x01000302 | MaskUnd) || + MaskIdx == (0x0504070601000302 | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::SWAP16, DL, XLenVT, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKBB16 + // Vector is built by indexes 5 4 1 0 or d c 5 4 9 8 1 0 + if (MaskIdx == (0x05040100 | MaskUnd) || + MaskIdx == (0x0d0c050409080100ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKBB16, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKBT16 + // Vector is built by indexes 5 4 3 2 or d c 7 6 9 8 3 2 + if (MaskIdx == (0x05040302 | MaskUnd) || + MaskIdx == (0x0d0c070609080302ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKBT16, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKTT16 + // Vector is built by indexes 7 6 3 2 or f e 7 6 b a 3 2 + if (MaskIdx == (0x07060302 | MaskUnd) || + MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKTT16, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKTB16 + // Vector is built by indexes 7 6 1 0 or f e 5 4 b a 1 0 + if (MaskIdx == (0x07060100 | MaskUnd) || + MaskIdx == (0x0f0e05040b0a0100ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKTB16, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKBB32 + // Vector is built by indexes b a 9 8 3 2 1 0 + if (MaskIdx == (0x0b0a090803020100ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKBB32, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKBT32 + // Vector is built by indexes b a 9 8 7 6 5 4 + if (MaskIdx == (0x0b0a090807060504ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKBT32, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKTT32 + // Vector is built by indexes f e d c 7 6 5 4 + if (MaskIdx == (0x0f0e0d0c07060504ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKTT32, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Packing vector by PKTB32 + // Vector is built by indexes f e d c 3 2 1 0 + if (MaskIdx == (0x0f0e0d0c03020100ull | MaskUnd)) { + SDValue V = DAG.getNode(RISCVISD::PKTB32, DL, XLenVT, V2, V1); + return DAG.getBitcast(VecTy, V); + } + + // Detect shuffles which can be re-expressed as vector selects; these are + // shuffles in which each element in the destination is taken from an element + // at the corresponding index in either source vectors. + bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { + int MaskIndex = MaskIdx.value(); + return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; + }); + + if (IsSelect) { + // Construct the select mask. + SmallVector MaskVals; + for (int MaskIndex : Mask) { + if (MaskIndex < (int)NumElts) + MaskVals.push_back(DAG.getAllOnesConstant(DL, XLenVT)); + else + MaskVals.push_back(DAG.getConstant(0, DL, XLenVT)); + } + + SDValue SelectMask = DAG.getBuildVector(VecTy, DL, MaskVals); + return DAG.getNode(ISD::VSELECT, DL, VecTy, SelectMask, V1, V2); + } + + // Lower it by BUILD_VECTOR. + return SDValue(); +} + +static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (Subtarget.hasStdExtV()) { + if (SDValue V = lowerRVV_VECTOR_SHUFFLE(Op, DAG, Subtarget)) + return V; + } + + if (Subtarget.hasStdExtP()) { + if (SDValue V = lowerRVP_VECTOR_SHUFFLE(Op, DAG, Subtarget)) + return V; + } + + return SDValue(); +} + static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -3134,8 +3423,9 @@ // original vector (with an undisturbed tail policy for elements >= VL), we // achieve the desired result of leaving all elements untouched except the one // at VL-1, which is replaced with the desired value. -SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { +SDValue +RISCVTargetLowering::lowerRVV_INSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); MVT VecVT = Op.getSimpleValueType(); SDValue Vec = Op.getOperand(0); @@ -3227,6 +3517,50 @@ return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); } +SDValue +RISCVTargetLowering::lowerRVP_INSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + MVT VecTy = Op.getSimpleValueType(); + assert((VecTy == MVT::v4i8 || VecTy == MVT::v2i16 || VecTy == MVT::v8i8 || + VecTy == MVT::v4i16 || VecTy == MVT::v2i32) && + "Unexpected VTs!"); + + SDLoc DL(Op); + MVT EltTy = VecTy.getVectorElementType(); + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue Vector = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + // Check for non-constant or out of range lane. + ConstantSDNode *CI = dyn_cast(Op.getOperand(2)); + if (!CI || CI->getZExtValue() >= VecTy.getVectorNumElements()) + return SDValue(); + + SDValue Index = DAG.getConstant(CI->getZExtValue(), DL, XLenVT); + + if (EltTy == MVT::i8) + return DAG.getNode(RISCVISD::INSERTB, DL, VecTy, {Vector, Value, Index}); + else if (EltTy == MVT::i16) + return DAG.getNode(RISCVISD::INSERTH, DL, VecTy, {Vector, Value, Index}); + else + return DAG.getNode(RISCVISD::INSERTW, DL, VecTy, {Vector, Value, Index}); +} + +SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget.hasStdExtV()) { + if (SDValue V = lowerRVV_INSERT_VECTOR_ELT(Op, DAG)) + return V; + } + + if (Subtarget.hasStdExtP()) { + if (SDValue V = lowerRVP_INSERT_VECTOR_ELT(Op, DAG)) + return V; + } + + return SDValue(); +} + // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then // extract the first element: (extractelt (slidedown vec, idx), 0). For integer // types this is done using VMV_X_S to allow us to glean information about the @@ -7958,6 +8292,19 @@ NODE_NAME_CASE(BDECOMPRESS) NODE_NAME_CASE(BDECOMPRESSW) NODE_NAME_CASE(BITREV) + NODE_NAME_CASE(INSERTB) + NODE_NAME_CASE(INSERTH) + NODE_NAME_CASE(INSERTW) + NODE_NAME_CASE(SWAP8) + NODE_NAME_CASE(SWAP16) + NODE_NAME_CASE(PKBB16) + NODE_NAME_CASE(PKBT16) + NODE_NAME_CASE(PKTT16) + NODE_NAME_CASE(PKTB16) + NODE_NAME_CASE(PKBB32) + NODE_NAME_CASE(PKBT32) + NODE_NAME_CASE(PKTT32) + NODE_NAME_CASE(PKTB32) NODE_NAME_CASE(VMV_V_X_VL) NODE_NAME_CASE(VFMV_V_F_VL) NODE_NAME_CASE(VMV_X_S) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -34,6 +34,9 @@ SDTCisInt<2>]>; def SDT_RISCVReadCycleWide : SDTypeProfile<2, 0, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_RISCVIntUnaryOp : SDTypeProfile<1, 1, [ + SDTCisSameAs<0, 1>, SDTCisVT<0, XLenVT> +]>; def SDT_RISCVIntUnaryOpW : SDTypeProfile<1, 1, [ SDTCisSameAs<0, 1>, SDTCisVT<0, i64> ]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -17,8 +17,28 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def SDT_RISCVVecInsert : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, XLenVT>, SDTCisVT<3, XLenVT> +]>; + def riscv_bitrev : SDNode<"RISCVISD::BITREV", SDT_RISCVIntBinOp>; +def riscv_insertb : SDNode<"RISCVISD::INSERTB", SDT_RISCVVecInsert>; +def riscv_inserth : SDNode<"RISCVISD::INSERTH", SDT_RISCVVecInsert>; +def riscv_insertw : SDNode<"RISCVISD::INSERTW", SDT_RISCVVecInsert>; + +def riscv_swap8 : SDNode<"RISCVISD::SWAP8", SDT_RISCVIntUnaryOp>; +def riscv_swap16 : SDNode<"RISCVISD::SWAP16", SDT_RISCVIntUnaryOp>; + +def riscv_pkbb16 : SDNode<"RISCVISD::PKBB16", SDT_RISCVIntBinOp>; +def riscv_pkbt16 : SDNode<"RISCVISD::PKBT16", SDT_RISCVIntBinOp>; +def riscv_pktt16 : SDNode<"RISCVISD::PKTT16", SDT_RISCVIntBinOp>; +def riscv_pktb16 : SDNode<"RISCVISD::PKTB16", SDT_RISCVIntBinOp>; +def riscv_pkbb32 : SDNode<"RISCVISD::PKBB32", SDT_RISCVIntBinOp>; +def riscv_pkbt32 : SDNode<"RISCVISD::PKBT32", SDT_RISCVIntBinOp>; +def riscv_pktt32 : SDNode<"RISCVISD::PKTT32", SDT_RISCVIntBinOp>; +def riscv_pktb32 : SDNode<"RISCVISD::PKTB32", SDT_RISCVIntBinOp>; + def uimm3 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<3>; let DecoderMethod = "decodeUImmOperand<3>"; @@ -1275,3 +1295,55 @@ let Predicates = [HasStdExtZpn] in def : Pat<(riscv_bitrev GPR:$rs1, uimmlog2xlen:$imm), (BITREVI GPR:$rs1, uimmlog2xlen:$imm)>; + +// Insertion +let Predicates = [HasStdExtZpn] in +def : Pat<(XVEI8VT (riscv_insertb GPR:$rs1, GPR:$rs2, uimmlog2xlenbytes:$imm)), + (INSB GPR:$rs1, GPR:$rs2, uimmlog2xlenbytes:$imm)>; + +let Predicates = [HasStdExtZpn, IsRV32] in { +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 0)), + (PKTB16 GPR:$rs1, GPR:$rs2)>; +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 1)), + (PKBB16 GPR:$rs2, GPR:$rs1)>; +} // [HasStdExtZpn, IsRV32] + +let Predicates = [HasStdExtZpn, IsRV64] in { +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 0)), + (PKTB32 GPR:$rs1, (PKTB16 GPR:$rs1, GPR:$rs2))>; +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 1)), + (PKTB32 GPR:$rs1, (PKBB16 GPR:$rs2, GPR:$rs1))>; +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 2)), + (PKBB32 (PKTB16 (PKBT32 GPR:$rs1, GPR:$rs1), GPR:$rs2), GPR:$rs1)>; +def : Pat<(XVEI16VT (riscv_inserth GPR:$rs1, GPR:$rs2, 3)), + (PKBB32 (PKBB16 GPR:$rs2, (PKBT32 GPR:$rs1, GPR:$rs1)), GPR:$rs1)>; +def : Pat<(XVEI32VT (riscv_insertw GPR:$rs1, GPR:$rs2, 0)), + (PKTB32 GPR:$rs1, GPR:$rs2)>; +def : Pat<(XVEI32VT (riscv_insertw GPR:$rs1, GPR:$rs2, 1)), + (PKBB32 GPR:$rs2, GPR:$rs1)>; +} // [HasStdExtZpn, IsRV64] + +// Swap +let Predicates = [HasStdExtZpn] in { +def : Pat<(riscv_swap8 GPR:$rs1), (SWAP8 GPR:$rs1)>; +def : Pat<(riscv_swap16 GPR:$rs1), (SWAP16 GPR:$rs1)>; +} // Predicates = [HasStdExtZpn] + +// Packing +let Predicates = [HasStdExtZpn] in { +def : Pat<(riscv_pkbb16 GPR:$rs1, GPR:$rs2), (PKBB16 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pkbt16 GPR:$rs1, GPR:$rs2), (PKBT16 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pktt16 GPR:$rs1, GPR:$rs2), (PKTT16 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pktb16 GPR:$rs1, GPR:$rs2), (PKTB16 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pkbb32 GPR:$rs1, GPR:$rs2), (PKBB32 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pkbt32 GPR:$rs1, GPR:$rs2), (PKBT32 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pktt32 GPR:$rs1, GPR:$rs2), (PKTT32 GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_pktb32 GPR:$rs1, GPR:$rs2), (PKTB32 GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZpn] + +// vselect +let Predicates = [HasStdExtZpn] in { +foreach VT = [XVEI8VT, XVEI16VT, XVEI32VT] in +def : Pat<(VT (vselect GPR:$rc, GPR:$rs1, GPR:$rs2)), + (BPICK GPR:$rs1, GPR:$rs2, GPR:$rc)>; +} // Predicates = [HasStdExtZpn] diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll @@ -183,15 +183,15 @@ define i32 @andv4i8(i32 %a) nounwind { ; RV32-LABEL: andv4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI10_0) -; RV32-NEXT: lw a1, %lo(.LCPI10_0)(a1) +; RV32-NEXT: lui a1, 16432 +; RV32-NEXT: addi a1, a1, 513 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: andv4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI10_0) -; RV64-NEXT: ld a1, %lo(.LCPI10_0)(a1) +; RV64-NEXT: lui a1, 16432 +; RV64-NEXT: addiw a1, a1, 513 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> @@ -203,15 +203,15 @@ define i32 @andv2i16(i32 %a) nounwind { ; RV32-LABEL: andv2i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI11_0) -; RV32-NEXT: lw a1, %lo(.LCPI11_0)(a1) +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: andv2i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI11_0) -; RV64-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: addiw a1, a1, 1 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> @@ -223,18 +223,22 @@ define i64 @andv8i8(i64 %a) nounwind { ; RV32-LABEL: andv8i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI12_0) -; RV32-NEXT: lw a2, %lo(.LCPI12_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI12_1) -; RV32-NEXT: lw a3, %lo(.LCPI12_1)(a3) +; RV32-NEXT: lui a2, 32880 +; RV32-NEXT: addi a2, a2, 1541 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: lui a2, 16432 +; RV32-NEXT: addi a2, a2, 513 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: andv8i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI12_0) -; RV64-NEXT: ld a1, %lo(.LCPI12_0)(a1) +; RV64-NEXT: lui a1, 32880 +; RV64-NEXT: addiw a1, a1, 1541 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 1027 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 513 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> @@ -246,18 +250,22 @@ define i64 @andv4i16(i64 %a) nounwind { ; RV32-LABEL: andv4i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI13_0) -; RV32-NEXT: lw a2, %lo(.LCPI13_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI13_1) -; RV32-NEXT: lw a3, %lo(.LCPI13_1)(a3) +; RV32-NEXT: lui a2, 64 +; RV32-NEXT: addi a2, a2, 3 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: lui a2, 32 +; RV32-NEXT: addi a2, a2, 1 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: andv4i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI13_0) -; RV64-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: slli a1, a1, 15 +; RV64-NEXT: addi a1, a1, 1 +; RV64-NEXT: slli a1, a1, 17 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> @@ -275,8 +283,9 @@ ; ; RV64-LABEL: andv2i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI14_0) -; RV64-NEXT: ld a1, %lo(.LCPI14_0)(a1) +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> @@ -288,15 +297,15 @@ define i32 @orv4i8(i32 %a) nounwind { ; RV32-LABEL: orv4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI15_0) -; RV32-NEXT: lw a1, %lo(.LCPI15_0)(a1) +; RV32-NEXT: lui a1, 16432 +; RV32-NEXT: addi a1, a1, 513 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: orv4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI15_0) -; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64-NEXT: lui a1, 16432 +; RV64-NEXT: addiw a1, a1, 513 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> @@ -308,15 +317,15 @@ define i32 @orv2i16(i32 %a) nounwind { ; RV32-LABEL: orv2i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI16_0) -; RV32-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: orv2i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI16_0) -; RV64-NEXT: ld a1, %lo(.LCPI16_0)(a1) +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: addiw a1, a1, 1 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> @@ -328,18 +337,22 @@ define i64 @orv8i8(i64 %a) nounwind { ; RV32-LABEL: orv8i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI17_0) -; RV32-NEXT: lw a2, %lo(.LCPI17_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI17_1) -; RV32-NEXT: lw a3, %lo(.LCPI17_1)(a3) +; RV32-NEXT: lui a2, 32880 +; RV32-NEXT: addi a2, a2, 1541 ; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: or a0, a0, a3 +; RV32-NEXT: lui a2, 16432 +; RV32-NEXT: addi a2, a2, 513 +; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: orv8i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI17_0) -; RV64-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64-NEXT: lui a1, 32880 +; RV64-NEXT: addiw a1, a1, 1541 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 1027 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 513 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> @@ -351,18 +364,22 @@ define i64 @orv4i16(i64 %a) nounwind { ; RV32-LABEL: orv4i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI18_0) -; RV32-NEXT: lw a2, %lo(.LCPI18_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI18_1) -; RV32-NEXT: lw a3, %lo(.LCPI18_1)(a3) +; RV32-NEXT: lui a2, 64 +; RV32-NEXT: addi a2, a2, 3 ; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: or a0, a0, a3 +; RV32-NEXT: lui a2, 32 +; RV32-NEXT: addi a2, a2, 1 +; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: orv4i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI18_0) -; RV64-NEXT: ld a1, %lo(.LCPI18_0)(a1) +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: slli a1, a1, 15 +; RV64-NEXT: addi a1, a1, 1 +; RV64-NEXT: slli a1, a1, 17 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> @@ -380,8 +397,9 @@ ; ; RV64-LABEL: orv2i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI19_0) -; RV64-NEXT: ld a1, %lo(.LCPI19_0)(a1) +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> @@ -393,15 +411,15 @@ define i32 @xorv4i8(i32 %a) nounwind { ; RV32-LABEL: xorv4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI20_0) -; RV32-NEXT: lw a1, %lo(.LCPI20_0)(a1) +; RV32-NEXT: lui a1, 16432 +; RV32-NEXT: addi a1, a1, 513 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI20_0) -; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) +; RV64-NEXT: lui a1, 16432 +; RV64-NEXT: addiw a1, a1, 513 ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> @@ -413,15 +431,15 @@ define i32 @xorv2i16(i32 %a) nounwind { ; RV32-LABEL: xorv2i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI21_0) -; RV32-NEXT: lw a1, %lo(.LCPI21_0)(a1) +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv2i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI21_0) -; RV64-NEXT: ld a1, %lo(.LCPI21_0)(a1) +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: addiw a1, a1, 1 ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> @@ -433,18 +451,22 @@ define i64 @xorv8i8(i64 %a) nounwind { ; RV32-LABEL: xorv8i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI22_0) -; RV32-NEXT: lw a2, %lo(.LCPI22_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI22_1) -; RV32-NEXT: lw a3, %lo(.LCPI22_1)(a3) +; RV32-NEXT: lui a2, 32880 +; RV32-NEXT: addi a2, a2, 1541 ; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: xor a0, a0, a3 +; RV32-NEXT: lui a2, 16432 +; RV32-NEXT: addi a2, a2, 513 +; RV32-NEXT: xor a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv8i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI22_0) -; RV64-NEXT: ld a1, %lo(.LCPI22_0)(a1) +; RV64-NEXT: lui a1, 32880 +; RV64-NEXT: addiw a1, a1, 1541 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 1027 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: addi a1, a1, 513 ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> @@ -456,18 +478,22 @@ define i64 @xorv4i16(i64 %a) nounwind { ; RV32-LABEL: xorv4i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, %hi(.LCPI23_0) -; RV32-NEXT: lw a2, %lo(.LCPI23_0)(a2) -; RV32-NEXT: lui a3, %hi(.LCPI23_1) -; RV32-NEXT: lw a3, %lo(.LCPI23_1)(a3) +; RV32-NEXT: lui a2, 64 +; RV32-NEXT: addi a2, a2, 3 ; RV32-NEXT: xor a1, a1, a2 -; RV32-NEXT: xor a0, a0, a3 +; RV32-NEXT: lui a2, 32 +; RV32-NEXT: addi a2, a2, 1 +; RV32-NEXT: xor a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv4i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI23_0) -; RV64-NEXT: ld a1, %lo(.LCPI23_0)(a1) +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: slli a1, a1, 15 +; RV64-NEXT: addi a1, a1, 1 +; RV64-NEXT: slli a1, a1, 17 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> @@ -485,8 +511,9 @@ ; ; RV64-LABEL: xorv2i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI24_0) -; RV64-NEXT: ld a1, %lo(.LCPI24_0)(a1) +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll b/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll @@ -7,14 +7,14 @@ define i32 @buildvec_allconst_v4i8() { ; RV32-LABEL: buildvec_allconst_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI0_0) -; RV32-NEXT: lw a0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: lui a0, 12320 +; RV32-NEXT: addi a0, a0, 256 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_allconst_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI0_0) -; RV64-NEXT: ld a0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: lui a0, 12320 +; RV64-NEXT: addiw a0, a0, 256 ; RV64-NEXT: ret %res = bitcast <4 x i8> to i32 ret i32 %res @@ -23,14 +23,14 @@ define i32 @buildvec_undefelts_v4i8() { ; RV32-LABEL: buildvec_undefelts_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI1_0) -; RV32-NEXT: lw a0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: lui a0, 12288 +; RV32-NEXT: addi a0, a0, 256 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_undefelts_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI1_0) -; RV64-NEXT: ld a0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: lui a0, 12288 +; RV64-NEXT: addiw a0, a0, 256 ; RV64-NEXT: ret %res = bitcast <4 x i8> to i32 ret i32 %res @@ -51,14 +51,14 @@ define i32 @buildvec_allconst_v2i16() { ; RV32-LABEL: buildvec_allconst_v2i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-NEXT: lw a0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: lui a0, 272 +; RV32-NEXT: addi a0, a0, 13 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_allconst_v2i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: ld a0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: lui a0, 272 +; RV64-NEXT: addiw a0, a0, 13 ; RV64-NEXT: ret %res = bitcast <2 x i16> to i32 ret i32 %res @@ -67,22 +67,12 @@ define i32 @buildvec_undefelts_v2i16() { ; RV32-LABEL: buildvec_undefelts_v2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: addi a0, zero, 9 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_undefelts_v2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: addi a0, zero, 9 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %res = bitcast <2 x i16> to i32 ret i32 %res @@ -103,16 +93,20 @@ define i64 @buildvec_allconst_v8i8() { ; RV32-LABEL: buildvec_allconst_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI6_0) -; RV32-NEXT: lw a0, %lo(.LCPI6_0)(a0) -; RV32-NEXT: lui a1, %hi(.LCPI6_1) -; RV32-NEXT: lw a1, %lo(.LCPI6_1)(a1) +; RV32-NEXT: lui a0, 12320 +; RV32-NEXT: addi a0, a0, 256 +; RV32-NEXT: lui a1, 28768 +; RV32-NEXT: addi a1, a1, 1284 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_allconst_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI6_0) -; RV64-NEXT: ld a0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: lui a0, 7192 +; RV64-NEXT: addiw a0, a0, 321 +; RV64-NEXT: slli a0, a0, 17 +; RV64-NEXT: addi a0, a0, 385 +; RV64-NEXT: slli a0, a0, 17 +; RV64-NEXT: addi a0, a0, 256 ; RV64-NEXT: ret %res = bitcast <8 x i8> to i64 ret i64 %res @@ -121,16 +115,18 @@ define i64 @buildvec_undefelts_v8i8() { ; RV32-LABEL: buildvec_undefelts_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-NEXT: lw a0, %lo(.LCPI7_0)(a0) -; RV32-NEXT: lui a1, %hi(.LCPI7_1) -; RV32-NEXT: lw a1, %lo(.LCPI7_1)(a1) +; RV32-NEXT: lui a0, 12288 +; RV32-NEXT: addi a0, a0, 256 +; RV32-NEXT: lui a1, 96 +; RV32-NEXT: addi a1, a1, 1280 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_undefelts_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI7_0) -; RV64-NEXT: ld a0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: lui a0, 24656 +; RV64-NEXT: addiw a0, a0, 3 +; RV64-NEXT: slli a0, a0, 24 +; RV64-NEXT: addi a0, a0, 256 ; RV64-NEXT: ret %res = bitcast <8 x i8> to i64 ret i64 %res @@ -151,16 +147,20 @@ define i64 @buildvec_allconst_v4i16() { ; RV32-LABEL: buildvec_allconst_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI9_0) -; RV32-NEXT: lw a0, %lo(.LCPI9_0)(a0) -; RV32-NEXT: lui a1, %hi(.LCPI9_1) -; RV32-NEXT: lw a1, %lo(.LCPI9_1)(a1) +; RV32-NEXT: lui a0, 272 +; RV32-NEXT: addi a0, a0, 13 +; RV32-NEXT: lui a1, 400 +; RV32-NEXT: addi a1, a1, 21 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_allconst_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI9_0) -; RV64-NEXT: ld a0, %lo(.LCPI9_0)(a0) +; RV64-NEXT: lui a0, 400 +; RV64-NEXT: addiw a0, a0, 21 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: addi a0, a0, 17 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: addi a0, a0, 13 ; RV64-NEXT: ret %res = bitcast <4 x i16> to i64 ret i64 %res @@ -169,21 +169,15 @@ define i64 @buildvec_undefelts_v4i16() { ; RV32-LABEL: buildvec_undefelts_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: addi a0, zero, 9 -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: addi a0, zero, 15 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi a1, zero, 15 ; RV32-NEXT: ret ; ; RV64-LABEL: buildvec_undefelts_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-NEXT: ld a0, %lo(.LCPI10_0)(a0) +; RV64-NEXT: addi a0, zero, 15 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, 9 ; RV64-NEXT: ret %res = bitcast <4 x i16> to i64 ret i64 %res @@ -210,8 +204,9 @@ ; ; RV64-LABEL: buildvec_allconst_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: ld a0, %lo(.LCPI12_0)(a0) +; RV64-NEXT: addi a0, zero, 17 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, 13 ; RV64-NEXT: ret %res = bitcast <2 x i32> to i64 ret i64 %res @@ -225,12 +220,7 @@ ; ; RV64-LABEL: buildvec_undefelts_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: addi a0, zero, 9 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %res = bitcast <2 x i32> to i64 ret i64 %res diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll b/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll @@ -7,40 +7,12 @@ define i32 @insertelt_v4i8(i32 %x, i8 %y) { ; RV32-LABEL: insertelt_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sb a1, 12(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lb a0, 4(sp) -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: insb a0, a1, 3 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sb a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lb a0, 10(sp) -; RV64-NEXT: sb a0, 18(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 16(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 19(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: insb a0, a1, 3 ; RV64-NEXT: ret %a = bitcast i32 %x to <4 x i8> %b = insertelement <4 x i8> %a, i8 %y, i32 3 @@ -51,36 +23,13 @@ define i32 @insertelt_v2i16_0(i32 %x, i16 %y) { ; RV32-LABEL: insertelt_v2i16_0: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a1, 12(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktb16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v2i16_0: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 18(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 16(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a1, a0, a1 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %a = bitcast i32 %x to <2 x i16> %b = insertelement <2 x i16> %a, i16 %y, i32 0 @@ -91,36 +40,13 @@ define i32 @insertelt_v2i16_1(i32 %x, i16 %y) { ; RV32-LABEL: insertelt_v2i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a1, 12(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbb16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v2i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 16(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 18(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a1, a1, a0 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %a = bitcast i32 %x to <2 x i16> %b = insertelement <2 x i16> %a, i16 %y, i32 1 @@ -131,40 +57,12 @@ define i64 @insertelt_v8i8(i64 %x, i8 %y) { ; RV32-LABEL: insertelt_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sb a2, 12(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lh a1, 6(sp) -; RV32-NEXT: sh a1, 10(sp) -; RV32-NEXT: lb a1, 4(sp) -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: lb a1, 0(sp) -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: insb a1, a2, 1 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sb a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 22(sp) -; RV64-NEXT: lb a0, 12(sp) -; RV64-NEXT: sb a0, 20(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 21(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: insb a0, a1, 5 ; RV64-NEXT: ret %a = bitcast i64 %x to <8 x i8> %b = insertelement <8 x i8> %a, i8 %y, i64 5 @@ -175,36 +73,13 @@ define i64 @insertelt_v4i16_0(i64 %x, i16 %y) { ; RV32-LABEL: insertelt_v4i16_0: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a2, 12(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktb16 a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i16_0: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 18(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 16(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a1, a0, a1 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %b = insertelement <4 x i16> %a, i16 %y, i64 0 @@ -215,36 +90,13 @@ define i64 @insertelt_v4i16_1(i64 %x, i16 %y) { ; RV32-LABEL: insertelt_v4i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a2, 12(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbb16 a0, a2, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 16(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 18(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a1, a1, a0 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %b = insertelement <4 x i16> %a, i16 %y, i64 1 @@ -255,36 +107,14 @@ define i64 @insertelt_v4i16_2(i64 %x, i16 %y) { ; RV32-LABEL: insertelt_v4i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a2, 12(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lh a1, 6(sp) -; RV32-NEXT: sh a1, 10(sp) -; RV32-NEXT: lh a1, 0(sp) -; RV32-NEXT: sh a1, 8(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktb16 a1, a1, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 22(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 20(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt32 a2, a0, a0 +; RV64-NEXT: pktb16 a1, a2, a1 +; RV64-NEXT: pkbb32 a0, a1, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %b = insertelement <4 x i16> %a, i16 %y, i64 2 @@ -295,36 +125,14 @@ define i64 @insertelt_v4i16_3(i64 %x, i16 %y) { ; RV32-LABEL: insertelt_v4i16_3: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sh a2, 12(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a1, 0(sp) -; RV32-NEXT: sh a1, 8(sp) -; RV32-NEXT: lh a1, 4(sp) -; RV32-NEXT: sh a1, 10(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbb16 a1, a2, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i16_3: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sh a1, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 4(sp) -; RV64-NEXT: sh a0, 20(sp) -; RV64-NEXT: lw a0, 0(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 22(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt32 a2, a0, a0 +; RV64-NEXT: pkbb16 a1, a1, a2 +; RV64-NEXT: pkbb32 a0, a1, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <4 x i16> %b = insertelement <4 x i16> %a, i16 %y, i64 3 @@ -340,18 +148,7 @@ ; ; RV64-LABEL: insertelt_v2i32_0: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sw a1, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: lw a0, 0(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %a = bitcast i64 %x to <2 x i32> %b = insertelement <2 x i32> %a, i32 %y, i64 0 @@ -367,18 +164,7 @@ ; ; RV64-LABEL: insertelt_v2i32_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sw a1, 24(sp) -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 0(sp) -; RV64-NEXT: sw a0, 16(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 20(sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb32 a0, a1, a0 ; RV64-NEXT: ret %a = bitcast i64 %x to <2 x i32> %b = insertelement <2 x i32> %a, i32 %y, i64 1 diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll b/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll @@ -7,30 +7,12 @@ define i32 @pkbb16_v2i16_1(i32 %a, i32 %b) { ; RV32-LABEL: pkbb16_v2i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbb16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbb16_v2i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -42,30 +24,12 @@ define i32 @pkbb16_v2i16_2(i32 %a, i32 %b) { ; RV32-LABEL: pkbb16_v2i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbb16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbb16_v2i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -77,41 +41,13 @@ define i64 @pkbb16_v4i16_1(i64 %a, i64 %b) { ; RV32-LABEL: pkbb16_v4i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lh a0, 24(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pkbb16 a0, a2, a0 +; RV32-NEXT: pkbb16 a1, a3, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbb16_v4i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 12(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -123,41 +59,13 @@ define i64 @pkbb16_v4i16_2(i64 %a, i64 %b) { ; RV32-LABEL: pkbb16_v4i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 24(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pkbb16 a0, a0, a2 +; RV32-NEXT: pkbb16 a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbb16_v4i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 12(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -169,30 +77,12 @@ define i32 @pkbt16_v2i16_1(i32 %a, i32 %b) { ; RV32-LABEL: pkbt16_v2i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbt16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbt16_v2i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -204,30 +94,12 @@ define i32 @pkbt16_v2i16_2(i32 %a, i32 %b) { ; RV32-LABEL: pkbt16_v2i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbt16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbt16_v2i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -239,41 +111,13 @@ define i64 @pkbt16_v4i16_1(i64 %a, i64 %b) { ; RV32-LABEL: pkbt16_v4i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lh a0, 24(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pkbt16 a0, a2, a0 +; RV32-NEXT: pkbt16 a1, a3, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbt16_v4i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -285,41 +129,13 @@ define i64 @pkbt16_v4i16_2(i64 %a, i64 %b) { ; RV32-LABEL: pkbt16_v4i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 24(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pkbt16 a0, a0, a2 +; RV32-NEXT: pkbt16 a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: pkbt16_v4i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -331,30 +147,12 @@ define i32 @pktt16_v2i16_1(i32 %a, i32 %b) { ; RV32-LABEL: pktt16_v2i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktt16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: pktt16_v2i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -366,30 +164,12 @@ define i32 @pktt16_v2i16_2(i32 %a, i32 %b) { ; RV32-LABEL: pktt16_v2i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktt16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pktt16_v2i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -401,41 +181,13 @@ define i64 @pktt16_v4i16_1(i64 %a, i64 %b) { ; RV32-LABEL: pktt16_v4i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 14(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pktt16 a0, a0, a2 +; RV32-NEXT: pktt16 a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: pktt16_v4i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -447,41 +199,13 @@ define i64 @pktt16_v4i16_2(i64 %a, i64 %b) { ; RV32-LABEL: pktt16_v4i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 14(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pktt16 a0, a2, a0 +; RV32-NEXT: pktt16 a1, a3, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pktt16_v4i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -493,30 +217,12 @@ define i32 @pktb16_v2i16_1(i32 %a, i32 %b) { ; RV32-LABEL: pktb16_v2i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktb16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: pktb16_v2i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -528,30 +234,12 @@ define i32 @pktb16_v2i16_2(i32 %a, i32 %b) { ; RV32-LABEL: pktb16_v2i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pktb16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: pktb16_v2i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -563,41 +251,13 @@ define i64 @pktb16_v4i16_1(i64 %a, i64 %b) { ; RV32-LABEL: pktb16_v4i16_1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 14(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pktb16 a0, a0, a2 +; RV32-NEXT: pktb16 a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: pktb16_v4i16_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 12(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -609,41 +269,13 @@ define i64 @pktb16_v4i16_2(i64 %a, i64 %b) { ; RV32-LABEL: pktb16_v4i16_2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lh a0, 14(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pktb16 a0, a0, a2 +; RV32-NEXT: pktb16 a1, a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: pktb16_v4i16_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 12(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -660,16 +292,7 @@ ; ; RV64-LABEL: pkbb32_v2i32_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb32 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -687,16 +310,7 @@ ; ; RV64-LABEL: pkbb32_v2i32_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -714,16 +328,7 @@ ; ; RV64-LABEL: pkbt32_v2i32_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt32 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -741,16 +346,7 @@ ; ; RV64-LABEL: pkbt32_v2i32_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -768,16 +364,7 @@ ; ; RV64-LABEL: pktt32_v2i32_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt32 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -794,16 +381,7 @@ ; ; RV64-LABEL: pktt32_v2i32_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktt32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -820,16 +398,7 @@ ; ; RV64-LABEL: pktb32_v2i32_1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb32 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -846,16 +415,7 @@ ; ; RV64-LABEL: pktb32_v2i32_2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pktb32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll b/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll @@ -9,36 +9,38 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 6(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: mv a2, zero +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: lb a0, 12(sp) +; RV32-NEXT: lb a1, 14(sp) +; RV32-NEXT: lb a3, 10(sp) +; RV32-NEXT: lb a4, 15(sp) +; RV32-NEXT: insb a2, a0, 0 +; RV32-NEXT: insb a2, a1, 1 +; RV32-NEXT: insb a2, a3, 2 +; RV32-NEXT: insb a2, a4, 3 +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lb a0, 19(sp) -; RV64-NEXT: sb a0, 27(sp) -; RV64-NEXT: lb a0, 10(sp) -; RV64-NEXT: sb a0, 26(sp) -; RV64-NEXT: lb a0, 18(sp) -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 16(sp) -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: mv a2, zero +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: sd a1, 0(sp) +; RV64-NEXT: lb a0, 8(sp) +; RV64-NEXT: lb a1, 10(sp) +; RV64-NEXT: lb a3, 2(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a2, a0, 0 +; RV64-NEXT: insb a2, a1, 1 +; RV64-NEXT: insb a2, a3, 2 +; RV64-NEXT: insb a2, a4, 3 +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -50,30 +52,12 @@ define i32 @shuffle_v2i16(i32 %a, i32 %b) { ; RV32-LABEL: shuffle_v2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: pkbt16 a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_v2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbt16 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -85,75 +69,63 @@ define i64 @shuffle_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: shuffle_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw a0, 32(sp) -; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lb a0, 35(sp) -; RV32-NEXT: sb a0, 39(sp) -; RV32-NEXT: lb a0, 31(sp) -; RV32-NEXT: sb a0, 37(sp) -; RV32-NEXT: lb a0, 32(sp) -; RV32-NEXT: sb a0, 36(sp) -; RV32-NEXT: lb a0, 23(sp) -; RV32-NEXT: sb a0, 27(sp) -; RV32-NEXT: lb a0, 29(sp) -; RV32-NEXT: sb a0, 25(sp) -; RV32-NEXT: lb a0, 21(sp) -; RV32-NEXT: sb a0, 24(sp) -; RV32-NEXT: lb a0, 41(sp) -; RV32-NEXT: sb a0, 46(sp) -; RV32-NEXT: lb a0, 40(sp) -; RV32-NEXT: sb a0, 44(sp) -; RV32-NEXT: lw a0, 36(sp) -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lw a0, 44(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lb a0, 42(sp) -; RV32-NEXT: sb a0, 18(sp) -; RV32-NEXT: lb a0, 15(sp) -; RV32-NEXT: sb a0, 19(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lb a0, 7(sp) -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 12(sp) +; RV32-NEXT: mv a1, zero +; RV32-NEXT: mv a3, zero +; RV32-NEXT: mv a4, zero +; RV32-NEXT: insb a1, a0, 0 +; RV32-NEXT: lb a0, 13(sp) +; RV32-NEXT: lb a5, 1(sp) +; RV32-NEXT: lb a6, 5(sp) +; RV32-NEXT: lb a7, 3(sp) +; RV32-NEXT: insb a1, a0, 2 +; RV32-NEXT: insb a3, a5, 0 +; RV32-NEXT: insb a3, a6, 1 +; RV32-NEXT: insb a3, a7, 3 +; RV32-NEXT: lui a0, 1044496 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: lb a6, 8(sp) +; RV32-NEXT: lb a7, 7(sp) +; RV32-NEXT: lb a5, 11(sp) +; RV32-NEXT: bpick a1, a3, a1, a0 +; RV32-NEXT: insb a4, a6, 0 +; RV32-NEXT: insb a4, a7, 1 +; RV32-NEXT: insb a4, a5, 3 +; RV32-NEXT: bpick a0, a4, a2, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lb a0, 23(sp) -; RV64-NEXT: sb a0, 31(sp) -; RV64-NEXT: lb a0, 9(sp) -; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: mv a2, zero +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: sd a1, 0(sp) +; RV64-NEXT: lb a0, 8(sp) +; RV64-NEXT: lb a1, 7(sp) +; RV64-NEXT: lb a3, 2(sp) +; RV64-NEXT: lb a4, 11(sp) +; RV64-NEXT: insb a2, a0, 0 +; RV64-NEXT: insb a2, a1, 1 +; RV64-NEXT: insb a2, a3, 2 +; RV64-NEXT: insb a2, a4, 3 ; RV64-NEXT: lb a0, 13(sp) -; RV64-NEXT: sb a0, 29(sp) -; RV64-NEXT: lb a0, 21(sp) -; RV64-NEXT: sb a0, 28(sp) -; RV64-NEXT: lb a0, 19(sp) -; RV64-NEXT: sb a0, 27(sp) -; RV64-NEXT: lb a0, 10(sp) -; RV64-NEXT: sb a0, 26(sp) -; RV64-NEXT: lb a0, 15(sp) -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 16(sp) -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: lb a1, 5(sp) +; RV64-NEXT: lb a3, 1(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: insb a2, a0, 4 +; RV64-NEXT: insb a2, a1, 5 +; RV64-NEXT: insb a2, a3, 6 +; RV64-NEXT: insb a2, a4, 7 +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -165,41 +137,31 @@ define i64 @shuffle_v4i16(i64 %a, i64 %b) { ; RV32-LABEL: shuffle_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: lh a0, 12(sp) -; RV32-NEXT: sh a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: pkbb16 a0, a3, a0 +; RV32-NEXT: pktt16 a1, a2, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: lh a1, 12(sp) +; RV64-NEXT: pktb16 a0, zero, a0 +; RV64-NEXT: pktb32 a0, zero, a0 +; RV64-NEXT: pkbb16 a1, a1, a0 +; RV64-NEXT: lh a2, 6(sp) +; RV64-NEXT: pktb32 a0, a0, a1 +; RV64-NEXT: pkbt32 a1, a0, a0 +; RV64-NEXT: lh a3, 10(sp) +; RV64-NEXT: pktb16 a1, a1, a2 +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: pkbt32 a1, a0, a0 +; RV64-NEXT: pkbb16 a1, a3, a1 +; RV64-NEXT: pkbb32 a0, a1, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -216,16 +178,7 @@ ; ; RV64-LABEL: shuffle_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: pkbb32 a0, a1, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll b/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll @@ -7,36 +7,14 @@ define i32 @swap_byte_v4i8(i32 %a, i32 %b) { ; RV32-LABEL: swap_byte_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -48,54 +26,18 @@ define i64 @swap_byte_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: swap_byte_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 0(sp) -; RV32-NEXT: sb a0, 7(sp) -; RV32-NEXT: lb a0, 1(sp) -; RV32-NEXT: sb a0, 6(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: sb a0, 5(sp) -; RV32-NEXT: lb a0, 3(sp) -; RV32-NEXT: sb a0, 4(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 4(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a1, a1 +; RV32-NEXT: swap16 a2, a1 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a1, a0 +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lb a0, 4(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 5(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 6(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 7(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 +; RV64-NEXT: pkbt32 a0, a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -107,36 +49,12 @@ define i32 @swap_byte_within_halfword_v4i8(i32 %a, i32 %b) { ; RV32-LABEL: swap_byte_within_halfword_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_within_halfword_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -148,54 +66,13 @@ define i64 @swap_byte_within_halfword_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: swap_byte_within_halfword_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: sb a0, 7(sp) -; RV32-NEXT: lb a0, 3(sp) -; RV32-NEXT: sb a0, 6(sp) -; RV32-NEXT: lb a0, 0(sp) -; RV32-NEXT: sb a0, 5(sp) -; RV32-NEXT: lb a0, 1(sp) -; RV32-NEXT: sb a0, 4(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap8 a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_within_halfword_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 6(sp) -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lb a0, 7(sp) -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lb a0, 4(sp) -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lb a0, 5(sp) -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -207,28 +84,12 @@ define i32 @swap_halfword_within_word_v4i8(i32 %a, i32 %b) { ; RV32-LABEL: swap_halfword_within_word_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_halfword_within_word_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: lh a0, 2(sp) -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap16 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -240,38 +101,13 @@ define i64 @swap_halfword_within_word_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: swap_halfword_within_word_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: sh a0, 6(sp) -; RV32-NEXT: lh a0, 2(sp) -; RV32-NEXT: sh a0, 4(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap16 a0, a0 +; RV32-NEXT: swap16 a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_halfword_within_word_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 4(sp) -; RV64-NEXT: sh a0, 14(sp) -; RV64-NEXT: lh a0, 6(sp) -; RV64-NEXT: sh a0, 12(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: lh a0, 2(sp) -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap16 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -285,36 +121,14 @@ define i32 @swap_byte_within_halfword_word_v4i8(i32 %a, i32 %b) { ; RV32-LABEL: swap_byte_within_halfword_word_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_within_halfword_word_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -326,54 +140,16 @@ define i64 @swap_byte_within_halfword_word_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: swap_byte_within_halfword_word_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lb a0, 0(sp) -; RV32-NEXT: sb a0, 7(sp) -; RV32-NEXT: lb a0, 1(sp) -; RV32-NEXT: sb a0, 6(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: sb a0, 5(sp) -; RV32-NEXT: lb a0, 3(sp) -; RV32-NEXT: sb a0, 4(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: swap8 a0, a0 +; RV32-NEXT: swap16 a0, a0 +; RV32-NEXT: swap8 a1, a1 +; RV32-NEXT: swap16 a1, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: swap_byte_within_halfword_word_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 4(sp) -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lb a0, 5(sp) -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lb a0, 6(sp) -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lb a0, 7(sp) -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: swap8 a0, a0 +; RV64-NEXT: swap16 a0, a0 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll b/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll @@ -9,38 +9,18 @@ define i32 @vselect_v4i8(i32 %a, i32 %b) { ; RV32-LABEL: vselect_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 6(sp) -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 4(sp) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, 4080 +; RV32-NEXT: addi a2, a2, 255 +; RV32-NEXT: bpick a0, a0, a1, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lb a0, 19(sp) -; RV64-NEXT: sb a0, 27(sp) -; RV64-NEXT: lb a0, 10(sp) -; RV64-NEXT: sb a0, 26(sp) -; RV64-NEXT: lb a0, 17(sp) -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 8(sp) -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: lui a2, 1048560 +; RV64-NEXT: addiw a2, a2, 255 +; RV64-NEXT: slli a2, a2, 16 +; RV64-NEXT: addi a2, a2, 255 +; RV64-NEXT: bpick a0, a0, a1, a2 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -52,49 +32,23 @@ define i64 @vselect_v8i8(i64 %a, i64 %b) { ; RV32-LABEL: vselect_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a3, 8(sp) -; RV32-NEXT: lh a0, 26(sp) -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lb a0, 21(sp) -; RV32-NEXT: sb a0, 29(sp) -; RV32-NEXT: lb a0, 24(sp) -; RV32-NEXT: sb a0, 28(sp) -; RV32-NEXT: lb a0, 15(sp) -; RV32-NEXT: sb a0, 19(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: sb a0, 18(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: sh a0, 16(sp) -; RV32-NEXT: lw a0, 28(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lui a4, 4096 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: bpick a1, a3, a1, a4 +; RV32-NEXT: lui a3, 1048560 +; RV32-NEXT: addi a3, a3, 255 +; RV32-NEXT: bpick a0, a0, a2, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lb a0, 23(sp) -; RV64-NEXT: sb a0, 31(sp) -; RV64-NEXT: lb a0, 14(sp) -; RV64-NEXT: sb a0, 30(sp) -; RV64-NEXT: lh a0, 12(sp) -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lb a0, 9(sp) -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 16(sp) -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: lui a2, 1044480 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: slli a2, a2, 16 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: slli a2, a2, 16 +; RV64-NEXT: addi a2, a2, 255 +; RV64-NEXT: bpick a0, a0, a1, a2 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -106,33 +60,15 @@ define i64 @vselect_v4i16(i64 %a, i64 %b) { ; RV32-LABEL: vselect_v4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: pktb16 a0, a2, a0 ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 8(sp) -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -1 +; RV64-NEXT: bpick a0, a0, a1, a2 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16>