diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -203,8 +203,11 @@ VMCLR_VL, VMSET_VL, - // Matches the semantics of vrgather.vx with an extra operand for VL. + // Matches the semantics of vrgather.vx and vrgather.vv with an extra operand + // for VL. VRGATHER_VX_VL, + VRGATHER_VV_VL, + VRGATHEREI16_VV_VL, // Vector sign/zero extend with additional mask & VL operands. VSEXT_VL, @@ -446,6 +449,7 @@ SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -472,6 +472,8 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + + setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -509,6 +511,8 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + + setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); }; if (Subtarget.hasStdExtZfh()) @@ -1528,6 +1532,8 @@ return lowerINSERT_SUBVECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return lowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::VECTOR_REVERSE: + return lowerVECTOR_REVERSE(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: @@ -2793,6 +2799,84 @@ return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); } +// Implement vector_reverse using vrgather.vv with indices determined by +// subtracting the id of each element from (VLMAX-1). This will convert +// the indices like so: +// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). +// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. +SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VecVT = Op.getSimpleValueType(); + unsigned EltSize = VecVT.getScalarSizeInBits(); + unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); + + unsigned MaxVLMAX = 0; + unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); + if (VectorBitsMax != 0) + MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; + + unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; + MVT IntVT = VecVT.changeVectorElementTypeToInteger(); + + // If this is SEW=8 and VLMAX is unknown or more than 256, we need + // to use vrgatherei16.vv. + // TODO: It's also possible to use vrgatherei16.vv for other types to + // decrease register width for the index calculation. + if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { + // If this is LMUL=8, we have to split before can use vrgatherei16.vv. + // Reverse each half, then reassemble them in reverse order. + // NOTE: It's also possible that after splitting that VLMAX no longer + // requires vrgatherei16.vv. + if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); + Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); + Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); + // Reassemble the low and high pieces reversed. + // FIXME: This is a CONCAT_VECTORS. + SDValue Res = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, + DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); + } + + // Just promote the int type to i16 which will double the LMUL. + IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); + GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; + } + + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); + + // Calculate VLMAX-1 for the desired SEW. + unsigned MinElts = VecVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + SDValue VLMinus1 = + DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); + + // Splat VLMAX-1 taking care to handle SEW==64 on RV32. + bool IsRV32E64 = + !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; + SDValue SplatVL; + if (!IsRV32E64) + SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); + else + SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); + + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); + SDValue Indices = + DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); + + return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); +} + SDValue RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const { @@ -5907,6 +5991,8 @@ NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) + NODE_NAME_CASE(VRGATHER_VV_VL) + NODE_NAME_CASE(VRGATHEREI16_VV_VL) NODE_NAME_CASE(VSEXT_VL) NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VLE_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -142,6 +142,24 @@ SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; +def riscv_vrgather_vv_vl : SDNode<"RISCVISD::VRGATHER_VV_VL", + SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisInt<2>, + SDTCisSameNumEltsAs<0, 2>, + SDTCisSameSizeAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>]>>; +def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL", + SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisInt<2>, + SDTCVecEltisVT<2, i16>, + SDTCisSameNumEltsAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>]>>; def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDTypeProfile<1, 4, [SDTCisVec<0>, @@ -995,6 +1013,12 @@ (!cast("PseudoVMV_S_X_"#vti.LMul.MX) vti.RegClass:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, (vti.Mask true_mask), (XLenVT (VLOp GPR:$vl)))), @@ -1005,6 +1029,22 @@ (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>; + + // emul = lmul * 16 / sew + defvar vlmul = vti.LMul; + defvar octuple_lmul = octuple_from_str.ret; + defvar octuple_emul = !srl(!mul(octuple_lmul, 16), shift_amount.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emul_str = octuple_to_str.ret; + defvar ivti = !cast("VI16" # emul_str); + defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str; + def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, + (ivti.Vector ivti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(inst) + vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.SEW)>; + } } } // Predicates = [HasStdExtV] @@ -1019,6 +1059,13 @@ (!cast("PseudoVFMV_S_"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.SEW)>; + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, + (ivti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, (vti.Mask true_mask), (XLenVT (VLOp GPR:$vl)))), @@ -1029,6 +1076,21 @@ (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX) vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>; + + defvar vlmul = vti.LMul; + defvar octuple_lmul = octuple_from_str.ret; + defvar octuple_emul = !srl(!mul(octuple_lmul, 16), shift_amount.val); + if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { + defvar emul_str = octuple_to_str.ret; + defvar ivti = !cast("VI16" # emul_str); + defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str; + def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, + (ivti.Vector ivti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(inst) + vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.SEW)>; + } } } // Predicates = [HasStdExtV, HasStdExtF] diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -0,0 +1,1113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256 +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512 + +; +; VECTOR_REVERSE - masks +; +; FIXME: Support for mask vectors + +;define @reverse_nxv2i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv2i1( %a) +; ret %res +;} +; +;define @reverse_nxv4i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv4i1( %a) +; ret %res +;} +; +;define @reverse_nxv8i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv8i1( %a) +; ret %res +;} +; +;define @reverse_nxv16i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv16i1( %a) +; ret %res +;} +; +;define @reverse_nxv32i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv32i1( %a) +; ret %res +;} +; +;define @reverse_nxv64i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv64i1( %a) +; ret %res +;} + + +; +; VECTOR_REVERSE - integer +; + +define @reverse_nxv1i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv1i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v25 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV32-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv1i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: srli a0, a0, 3 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; RV32-BITS-256-NEXT: vid.v v25 +; RV32-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-256-NEXT: vmv1r.v v8, v25 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv1i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: srli a0, a0, 3 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; RV32-BITS-512-NEXT: vid.v v25 +; RV32-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-512-NEXT: vmv1r.v v8, v25 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv1i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v25 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV64-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv1i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: srli a0, a0, 3 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; RV64-BITS-256-NEXT: vid.v v25 +; RV64-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-256-NEXT: vmv1r.v v8, v25 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv1i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: srli a0, a0, 3 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; RV64-BITS-512-NEXT: vid.v v25 +; RV64-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-512-NEXT: vmv1r.v v8, v25 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i8( %a) + ret %res +} + +define @reverse_nxv2i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv2i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v25 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV32-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv2i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: srli a0, a0, 2 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; RV32-BITS-256-NEXT: vid.v v25 +; RV32-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-256-NEXT: vmv1r.v v8, v25 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv2i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: srli a0, a0, 2 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; RV32-BITS-512-NEXT: vid.v v25 +; RV32-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-512-NEXT: vmv1r.v v8, v25 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v25 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV64-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv2i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: srli a0, a0, 2 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; RV64-BITS-256-NEXT: vid.v v25 +; RV64-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-256-NEXT: vmv1r.v v8, v25 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv2i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: srli a0, a0, 2 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; RV64-BITS-512-NEXT: vid.v v25 +; RV64-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-512-NEXT: vmv1r.v v8, v25 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i8( %a) + ret %res +} + +define @reverse_nxv4i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv4i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v25 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV32-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv4i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: srli a0, a0, 1 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; RV32-BITS-256-NEXT: vid.v v25 +; RV32-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-256-NEXT: vmv1r.v v8, v25 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv4i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: srli a0, a0, 1 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; RV32-BITS-512-NEXT: vid.v v25 +; RV32-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-512-NEXT: vmv1r.v v8, v25 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v25 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV64-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv4i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: srli a0, a0, 1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; RV64-BITS-256-NEXT: vid.v v25 +; RV64-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-256-NEXT: vmv1r.v v8, v25 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv4i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: srli a0, a0, 1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; RV64-BITS-512-NEXT: vid.v v25 +; RV64-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-512-NEXT: vmv1r.v v8, v25 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i8( %a) + ret %res +} + +define @reverse_nxv8i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv8i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v26 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v26, v26, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV32-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv8i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV32-BITS-256-NEXT: vid.v v25 +; RV32-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-256-NEXT: vmv1r.v v8, v25 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv8i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV32-BITS-512-NEXT: vid.v v25 +; RV32-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV32-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV32-BITS-512-NEXT: vmv1r.v v8, v25 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v26 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v26, v26, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v25, v8, v26 +; RV64-BITS-UNKNOWN-NEXT: vmv1r.v v8, v25 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv8i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV64-BITS-256-NEXT: vid.v v25 +; RV64-BITS-256-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-256-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-256-NEXT: vmv1r.v v8, v25 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv8i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; RV64-BITS-512-NEXT: vid.v v25 +; RV64-BITS-512-NEXT: vrsub.vx v26, v25, a0 +; RV64-BITS-512-NEXT: vrgather.vv v25, v8, v26 +; RV64-BITS-512-NEXT: vmv1r.v v8, v25 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i8( %a) + ret %res +} + +define @reverse_nxv16i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv16i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v28 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v28, v28, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v26, v8, v28 +; RV32-BITS-UNKNOWN-NEXT: vmv2r.v v8, v26 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv16i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; RV32-BITS-256-NEXT: vid.v v26 +; RV32-BITS-256-NEXT: vrsub.vx v28, v26, a0 +; RV32-BITS-256-NEXT: vrgather.vv v26, v8, v28 +; RV32-BITS-256-NEXT: vmv2r.v v8, v26 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv16i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; RV32-BITS-512-NEXT: vid.v v26 +; RV32-BITS-512-NEXT: vrsub.vx v28, v26, a0 +; RV32-BITS-512-NEXT: vrgather.vv v26, v8, v28 +; RV32-BITS-512-NEXT: vmv2r.v v8, v26 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v28 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v28, v28, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v26, v8, v28 +; RV64-BITS-UNKNOWN-NEXT: vmv2r.v v8, v26 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv16i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; RV64-BITS-256-NEXT: vid.v v26 +; RV64-BITS-256-NEXT: vrsub.vx v28, v26, a0 +; RV64-BITS-256-NEXT: vrgather.vv v26, v8, v28 +; RV64-BITS-256-NEXT: vmv2r.v v8, v26 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv16i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; RV64-BITS-512-NEXT: vid.v v26 +; RV64-BITS-512-NEXT: vrsub.vx v28, v26, a0 +; RV64-BITS-512-NEXT: vrgather.vv v26, v8, v28 +; RV64-BITS-512-NEXT: vmv2r.v v8, v26 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i8( %a) + ret %res +} + +define @reverse_nxv32i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv32i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v8, v28 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv32i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV32-BITS-256-NEXT: vid.v v28 +; RV32-BITS-256-NEXT: vrsub.vx v12, v28, a0 +; RV32-BITS-256-NEXT: vrgather.vv v28, v8, v12 +; RV32-BITS-256-NEXT: vmv4r.v v8, v28 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv32i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV32-BITS-512-NEXT: vid.v v28 +; RV32-BITS-512-NEXT: vrsub.vx v12, v28, a0 +; RV32-BITS-512-NEXT: vrgather.vv v28, v8, v12 +; RV32-BITS-512-NEXT: vmv4r.v v8, v28 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv32i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v8, v28 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv32i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV64-BITS-256-NEXT: vid.v v28 +; RV64-BITS-256-NEXT: vrsub.vx v12, v28, a0 +; RV64-BITS-256-NEXT: vrgather.vv v28, v8, v12 +; RV64-BITS-256-NEXT: vmv4r.v v8, v28 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv32i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV64-BITS-512-NEXT: vid.v v28 +; RV64-BITS-512-NEXT: vrsub.vx v12, v28, a0 +; RV64-BITS-512-NEXT: vrgather.vv v28, v8, v12 +; RV64-BITS-512-NEXT: vmv4r.v v8, v28 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32i8( %a) + ret %res +} + +define @reverse_nxv64i8( %a) { +; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i8: +; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 +; RV32-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: ret +; +; RV32-BITS-256-LABEL: reverse_nxv64i8: +; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; RV32-BITS-256-NEXT: vid.v v16 +; RV32-BITS-256-NEXT: vrsub.vx v24, v16, a0 +; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 +; RV32-BITS-256-NEXT: vmv8r.v v8, v16 +; RV32-BITS-256-NEXT: ret +; +; RV32-BITS-512-LABEL: reverse_nxv64i8: +; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV32-BITS-512-NEXT: vid.v v28 +; RV32-BITS-512-NEXT: vrsub.vx v28, v28, a0 +; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v28 +; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v28 +; RV32-BITS-512-NEXT: vmv8r.v v8, v16 +; RV32-BITS-512-NEXT: ret +; +; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i8: +; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 +; RV64-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv64i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; RV64-BITS-256-NEXT: vid.v v16 +; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 +; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-256-NEXT: vmv8r.v v8, v16 +; RV64-BITS-256-NEXT: ret +; +; RV64-BITS-512-LABEL: reverse_nxv64i8: +; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; RV64-BITS-512-NEXT: vid.v v28 +; RV64-BITS-512-NEXT: vrsub.vx v28, v28, a0 +; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v28 +; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v28 +; RV64-BITS-512-NEXT: vmv8r.v v8, v16 +; RV64-BITS-512-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv64i8( %a) + ret %res +} + +define @reverse_nxv1i16( %a) { +; CHECK-LABEL: reverse_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i16( %a) + ret %res +} + +define @reverse_nxv2i16( %a) { +; CHECK-LABEL: reverse_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i16( %a) + ret %res +} + +define @reverse_nxv4i16( %a) { +; CHECK-LABEL: reverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i16( %a) + ret %res +} + +define @reverse_nxv8i16( %a) { +; CHECK-LABEL: reverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i16( %a) + ret %res +} + +define @reverse_nxv16i16( %a) { +; CHECK-LABEL: reverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i16( %a) + ret %res +} + +define @reverse_nxv32i16( %a) { +; CHECK-LABEL: reverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32i16( %a) + ret %res +} + +define @reverse_nxv1i32( %a) { +; CHECK-LABEL: reverse_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i32( %a) + ret %res +} + +define @reverse_nxv2i32( %a) { +; CHECK-LABEL: reverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i32( %a) + ret %res +} + +define @reverse_nxv4i32( %a) { +; CHECK-LABEL: reverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i32( %a) + ret %res +} + +define @reverse_nxv8i32( %a) { +; CHECK-LABEL: reverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i32( %a) + ret %res +} + +define @reverse_nxv16i32( %a) { +; CHECK-LABEL: reverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i32( %a) + ret %res +} + +define @reverse_nxv1i64( %a) { +; CHECK-LABEL: reverse_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i64( %a) + ret %res +} + +define @reverse_nxv2i64( %a) { +; CHECK-LABEL: reverse_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i64( %a) + ret %res +} + +define @reverse_nxv4i64( %a) { +; CHECK-LABEL: reverse_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i64( %a) + ret %res +} + +define @reverse_nxv8i64( %a) { +; CHECK-LABEL: reverse_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i64( %a) + ret %res +} + +; +; VECTOR_REVERSE - floating point +; + +define @reverse_nxv1f16( %a) { +; CHECK-LABEL: reverse_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f16( %a) + ret %res +} + +define @reverse_nxv2f16( %a) { +; CHECK-LABEL: reverse_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f16( %a) + ret %res +} + +define @reverse_nxv4f16( %a) { +; CHECK-LABEL: reverse_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f16( %a) + ret %res +} + +define @reverse_nxv8f16( %a) { +; CHECK-LABEL: reverse_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f16( %a) + ret %res +} + +define @reverse_nxv16f16( %a) { +; CHECK-LABEL: reverse_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16f16( %a) + ret %res +} + +define @reverse_nxv32f16( %a) { +; CHECK-LABEL: reverse_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32f16( %a) + ret %res +} + +define @reverse_nxv1f32( %a) { +; CHECK-LABEL: reverse_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f32( %a) + ret %res +} + +define @reverse_nxv2f32( %a) { +; CHECK-LABEL: reverse_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f32( %a) + ret %res +} + +define @reverse_nxv4f32( %a) { +; CHECK-LABEL: reverse_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f32( %a) + ret %res +} + +define @reverse_nxv8f32( %a) { +; CHECK-LABEL: reverse_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f32( %a) + ret %res +} + +define @reverse_nxv16f32( %a) { +; CHECK-LABEL: reverse_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) + ret %res +} + +define @reverse_nxv1f64( %a) { +; CHECK-LABEL: reverse_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f64( %a) + ret %res +} + +define @reverse_nxv2f64( %a) { +; CHECK-LABEL: reverse_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f64( %a) + ret %res +} + +define @reverse_nxv4f64( %a) { +; CHECK-LABEL: reverse_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f64( %a) + ret %res +} + +define @reverse_nxv8f64( %a) { +; CHECK-LABEL: reverse_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f64( %a) + ret %res +} + +declare @llvm.experimental.vector.reverse.nxv2i1() +declare @llvm.experimental.vector.reverse.nxv4i1() +declare @llvm.experimental.vector.reverse.nxv8i1() +declare @llvm.experimental.vector.reverse.nxv16i1() +declare @llvm.experimental.vector.reverse.nxv32i1() +declare @llvm.experimental.vector.reverse.nxv64i1() +declare @llvm.experimental.vector.reverse.nxv1i8() +declare @llvm.experimental.vector.reverse.nxv2i8() +declare @llvm.experimental.vector.reverse.nxv4i8() +declare @llvm.experimental.vector.reverse.nxv8i8() +declare @llvm.experimental.vector.reverse.nxv16i8() +declare @llvm.experimental.vector.reverse.nxv32i8() +declare @llvm.experimental.vector.reverse.nxv64i8() +declare @llvm.experimental.vector.reverse.nxv1i16() +declare @llvm.experimental.vector.reverse.nxv2i16() +declare @llvm.experimental.vector.reverse.nxv4i16() +declare @llvm.experimental.vector.reverse.nxv8i16() +declare @llvm.experimental.vector.reverse.nxv16i16() +declare @llvm.experimental.vector.reverse.nxv32i16() +declare @llvm.experimental.vector.reverse.nxv1i32() +declare @llvm.experimental.vector.reverse.nxv2i32() +declare @llvm.experimental.vector.reverse.nxv4i32() +declare @llvm.experimental.vector.reverse.nxv8i32() +declare @llvm.experimental.vector.reverse.nxv16i32() +declare @llvm.experimental.vector.reverse.nxv1i64() +declare @llvm.experimental.vector.reverse.nxv2i64() +declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv8i64() +declare @llvm.experimental.vector.reverse.nxv1f16() +declare @llvm.experimental.vector.reverse.nxv2f16() +declare @llvm.experimental.vector.reverse.nxv4f16() +declare @llvm.experimental.vector.reverse.nxv8f16() +declare @llvm.experimental.vector.reverse.nxv16f16() +declare @llvm.experimental.vector.reverse.nxv32f16() +declare @llvm.experimental.vector.reverse.nxv1f32() +declare @llvm.experimental.vector.reverse.nxv2f32() +declare @llvm.experimental.vector.reverse.nxv4f32() +declare @llvm.experimental.vector.reverse.nxv8f32() +declare @llvm.experimental.vector.reverse.nxv16f32() +declare @llvm.experimental.vector.reverse.nxv1f64() +declare @llvm.experimental.vector.reverse.nxv2f64() +declare @llvm.experimental.vector.reverse.nxv4f64() +declare @llvm.experimental.vector.reverse.nxv8f64() diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -714,6 +714,15 @@ return Changed; } +namespace { +struct TypeSizeComparator { + bool operator()(const TypeSize &LHS, const TypeSize &RHS) const { + return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) < + std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue()); + } +}; +} // end anonymous namespace + /// 1. Ensure that for each type T in A, there exists a type U in B, /// such that T and U have equal size in bits. /// 2. Ensure that for each type U in B, there exists a type T in A @@ -728,14 +737,16 @@ if (B.empty()) Changed |= EnforceAny(B); - auto NoSize = [](const SmallSet &Sizes, MVT T) -> bool { + typedef SmallSet TypeSizeSet; + + auto NoSize = [](const TypeSizeSet &Sizes, MVT T) -> bool { return !Sizes.count(T.getSizeInBits()); }; for (unsigned M : union_modes(A, B)) { TypeSetByHwMode::SetType &AS = A.get(M); TypeSetByHwMode::SetType &BS = B.get(M); - SmallSet AN, BN; + TypeSizeSet AN, BN; for (MVT T : AS) AN.insert(T.getSizeInBits());