diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -199,8 +199,10 @@ VMCLR_VL, VMSET_VL, - // Matches the semantics of vrgather.vx with an extra operand for VL. + // Matches the semantics of vrgather.vx and vrgather.vv with an extra operand + // for VL. VRGATHER_VX_VL, + VRGATHER_VV_VL, // Vector sign/zero extend with additional mask & VL operands. VSEXT_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -467,6 +467,8 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + + setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -504,6 +506,8 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + + setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); }; if (Subtarget.hasStdExtZfh()) @@ -1484,6 +1488,43 @@ return lowerINSERT_SUBVECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return lowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::VECTOR_REVERSE: { + // Implement vector_reverse using vrgather.vv with indices determined by + // subtracting the id of each element from (VLMAX-1). This will convert + // the indices like so: + // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). + // FIXME: This implementation doesn't work for vectors of more than 256 + // elements for SEW==8. + SDLoc DL(Op); + MVT VecVT = Op.getSimpleValueType(); + MVT IntVT = VecVT.changeVectorElementTypeToInteger(); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); + + // Calculate VLMAX-1 for the desired SEW. + unsigned MinElts = VecVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, + DAG.getConstant(1, DL, XLenVT)); + + // Splat VLMAX-1 taking care to handle SEW==64 on RV32. + bool IsRV32E64 = + !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; + SDValue SplatVL; + if (!IsRV32E64) + SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); + else + SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); + + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); + SDValue Indices = + DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); + + return DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, VecVT, Op.getOperand(0), + Indices, Mask, VL); + } case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: @@ -5629,6 +5670,7 @@ NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) + NODE_NAME_CASE(VRGATHER_VV_VL) NODE_NAME_CASE(VSEXT_VL) NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VLE_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -139,6 +139,15 @@ SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; +def riscv_vrgather_vv_vl : SDNode<"RISCVISD::VRGATHER_VV_VL", + SDTypeProfile<1, 4, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisInt<2>, + SDTCisSameNumEltsAs<0, 2>, + SDTCisSameSizeAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>]>>; def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDTypeProfile<1, 4, [SDTCisVec<0>, @@ -945,6 +954,12 @@ let Predicates = [HasStdExtV] in { foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, + (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, (vti.Mask true_mask), (XLenVT (VLOp GPR:$vl)))), @@ -962,6 +977,13 @@ let Predicates = [HasStdExtV, HasStdExtF] in { foreach vti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, + (ivti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX) + vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, (vti.Mask true_mask), (XLenVT (VLOp GPR:$vl)))), diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -0,0 +1,676 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+experimental-zfh -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK + +; +; VECTOR_REVERSE - masks +; +; FIXME: Support for mask vectors + +;define @reverse_nxv2i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv2i1( %a) +; ret %res +;} +; +;define @reverse_nxv4i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv4i1( %a) +; ret %res +;} +; +;define @reverse_nxv8i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv8i1( %a) +; ret %res +;} +; +;define @reverse_nxv16i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv16i1( %a) +; ret %res +;} +; +;define @reverse_nxv32i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv32i1( %a) +; ret %res +;} +; +;define @reverse_nxv64i1( %a) { +; %res = call @llvm.experimental.vector.reverse.nxv64i1( %a) +; ret %res +;} + + +; +; VECTOR_REVERSE - integer +; + +define @reverse_nxv1i8( %a) { +; CHECK-LABEL: reverse_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i8( %a) + ret %res +} + +define @reverse_nxv2i8( %a) { +; CHECK-LABEL: reverse_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i8( %a) + ret %res +} + +define @reverse_nxv4i8( %a) { +; CHECK-LABEL: reverse_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i8( %a) + ret %res +} + +define @reverse_nxv8i8( %a) { +; CHECK-LABEL: reverse_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i8( %a) + ret %res +} + +define @reverse_nxv16i8( %a) { +; CHECK-LABEL: reverse_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i8( %a) + ret %res +} + +define @reverse_nxv32i8( %a) { +; CHECK-LABEL: reverse_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32i8( %a) + ret %res +} + +define @reverse_nxv64i8( %a) { +; CHECK-LABEL: reverse_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv64i8( %a) + ret %res +} + +define @reverse_nxv1i16( %a) { +; CHECK-LABEL: reverse_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i16( %a) + ret %res +} + +define @reverse_nxv2i16( %a) { +; CHECK-LABEL: reverse_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i16( %a) + ret %res +} + +define @reverse_nxv4i16( %a) { +; CHECK-LABEL: reverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i16( %a) + ret %res +} + +define @reverse_nxv8i16( %a) { +; CHECK-LABEL: reverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i16( %a) + ret %res +} + +define @reverse_nxv16i16( %a) { +; CHECK-LABEL: reverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i16( %a) + ret %res +} + +define @reverse_nxv32i16( %a) { +; CHECK-LABEL: reverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32i16( %a) + ret %res +} + +define @reverse_nxv1i32( %a) { +; CHECK-LABEL: reverse_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i32( %a) + ret %res +} + +define @reverse_nxv2i32( %a) { +; CHECK-LABEL: reverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i32( %a) + ret %res +} + +define @reverse_nxv4i32( %a) { +; CHECK-LABEL: reverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i32( %a) + ret %res +} + +define @reverse_nxv8i32( %a) { +; CHECK-LABEL: reverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i32( %a) + ret %res +} + +define @reverse_nxv16i32( %a) { +; CHECK-LABEL: reverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16i32( %a) + ret %res +} + +define @reverse_nxv1i64( %a) { +; CHECK-LABEL: reverse_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1i64( %a) + ret %res +} + +define @reverse_nxv2i64( %a) { +; CHECK-LABEL: reverse_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2i64( %a) + ret %res +} + +define @reverse_nxv4i64( %a) { +; CHECK-LABEL: reverse_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4i64( %a) + ret %res +} + +define @reverse_nxv8i64( %a) { +; CHECK-LABEL: reverse_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8i64( %a) + ret %res +} + +; +; VECTOR_REVERSE - floating point +; + +define @reverse_nxv1f16( %a) { +; CHECK-LABEL: reverse_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f16( %a) + ret %res +} + +define @reverse_nxv2f16( %a) { +; CHECK-LABEL: reverse_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f16( %a) + ret %res +} + +define @reverse_nxv4f16( %a) { +; CHECK-LABEL: reverse_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f16( %a) + ret %res +} + +define @reverse_nxv8f16( %a) { +; CHECK-LABEL: reverse_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f16( %a) + ret %res +} + +define @reverse_nxv16f16( %a) { +; CHECK-LABEL: reverse_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16f16( %a) + ret %res +} + +define @reverse_nxv32f16( %a) { +; CHECK-LABEL: reverse_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv32f16( %a) + ret %res +} + +define @reverse_nxv1f32( %a) { +; CHECK-LABEL: reverse_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f32( %a) + ret %res +} + +define @reverse_nxv2f32( %a) { +; CHECK-LABEL: reverse_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f32( %a) + ret %res +} + +define @reverse_nxv4f32( %a) { +; CHECK-LABEL: reverse_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f32( %a) + ret %res +} + +define @reverse_nxv8f32( %a) { +; CHECK-LABEL: reverse_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f32( %a) + ret %res +} + +define @reverse_nxv16f32( %a) { +; CHECK-LABEL: reverse_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) + ret %res +} + +define @reverse_nxv1f64( %a) { +; CHECK-LABEL: reverse_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vrsub.vx v26, v25, a0 +; CHECK-NEXT: vrgather.vv v25, v8, v26 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv1f64( %a) + ret %res +} + +define @reverse_nxv2f64( %a) { +; CHECK-LABEL: reverse_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vid.v v26 +; CHECK-NEXT: vrsub.vx v28, v26, a0 +; CHECK-NEXT: vrgather.vv v26, v8, v28 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv2f64( %a) + ret %res +} + +define @reverse_nxv4f64( %a) { +; CHECK-LABEL: reverse_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vrsub.vx v12, v28, a0 +; CHECK-NEXT: vrgather.vv v28, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv4f64( %a) + ret %res +} + +define @reverse_nxv8f64( %a) { +; CHECK-LABEL: reverse_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a0 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.reverse.nxv8f64( %a) + ret %res +} + +declare @llvm.experimental.vector.reverse.nxv2i1() +declare @llvm.experimental.vector.reverse.nxv4i1() +declare @llvm.experimental.vector.reverse.nxv8i1() +declare @llvm.experimental.vector.reverse.nxv16i1() +declare @llvm.experimental.vector.reverse.nxv32i1() +declare @llvm.experimental.vector.reverse.nxv64i1() +declare @llvm.experimental.vector.reverse.nxv1i8() +declare @llvm.experimental.vector.reverse.nxv2i8() +declare @llvm.experimental.vector.reverse.nxv4i8() +declare @llvm.experimental.vector.reverse.nxv8i8() +declare @llvm.experimental.vector.reverse.nxv16i8() +declare @llvm.experimental.vector.reverse.nxv32i8() +declare @llvm.experimental.vector.reverse.nxv64i8() +declare @llvm.experimental.vector.reverse.nxv1i16() +declare @llvm.experimental.vector.reverse.nxv2i16() +declare @llvm.experimental.vector.reverse.nxv4i16() +declare @llvm.experimental.vector.reverse.nxv8i16() +declare @llvm.experimental.vector.reverse.nxv16i16() +declare @llvm.experimental.vector.reverse.nxv32i16() +declare @llvm.experimental.vector.reverse.nxv1i32() +declare @llvm.experimental.vector.reverse.nxv2i32() +declare @llvm.experimental.vector.reverse.nxv4i32() +declare @llvm.experimental.vector.reverse.nxv8i32() +declare @llvm.experimental.vector.reverse.nxv16i32() +declare @llvm.experimental.vector.reverse.nxv1i64() +declare @llvm.experimental.vector.reverse.nxv2i64() +declare @llvm.experimental.vector.reverse.nxv4i64() +declare @llvm.experimental.vector.reverse.nxv8i64() +declare @llvm.experimental.vector.reverse.nxv1f16() +declare @llvm.experimental.vector.reverse.nxv2f16() +declare @llvm.experimental.vector.reverse.nxv4f16() +declare @llvm.experimental.vector.reverse.nxv8f16() +declare @llvm.experimental.vector.reverse.nxv16f16() +declare @llvm.experimental.vector.reverse.nxv32f16() +declare @llvm.experimental.vector.reverse.nxv1f32() +declare @llvm.experimental.vector.reverse.nxv2f32() +declare @llvm.experimental.vector.reverse.nxv4f32() +declare @llvm.experimental.vector.reverse.nxv8f32() +declare @llvm.experimental.vector.reverse.nxv16f32() +declare @llvm.experimental.vector.reverse.nxv1f64() +declare @llvm.experimental.vector.reverse.nxv2f64() +declare @llvm.experimental.vector.reverse.nxv4f64() +declare @llvm.experimental.vector.reverse.nxv8f64() diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -728,19 +728,26 @@ if (B.empty()) Changed |= EnforceAny(B); - auto NoSize = [](const SmallSet &Sizes, MVT T) -> bool { - return !Sizes.count(T.getSizeInBits()); + auto NoSize = + [](const SmallSet, 2> &Sizes, + MVT T) -> bool { + TypeSize S = T.getSizeInBits(); + return !Sizes.count(std::make_pair(S.isScalable(), S.getKnownMinValue())); }; for (unsigned M : union_modes(A, B)) { TypeSetByHwMode::SetType &AS = A.get(M); TypeSetByHwMode::SetType &BS = B.get(M); - SmallSet AN, BN; + SmallSet, 2> AN, BN; - for (MVT T : AS) - AN.insert(T.getSizeInBits()); - for (MVT T : BS) - BN.insert(T.getSizeInBits()); + for (MVT T : AS) { + TypeSize S = T.getSizeInBits(); + AN.insert(std::make_pair(S.isScalable(), S.getKnownMinValue())); + } + for (MVT T : BS) { + TypeSize S = T.getSizeInBits(); + BN.insert(std::make_pair(S.isScalable(), S.getKnownMinValue())); + } Changed |= berase_if(AS, std::bind(NoSize, BN, std::placeholders::_1)); Changed |= berase_if(BS, std::bind(NoSize, AN, std::placeholders::_1));