diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -194,6 +194,10 @@ // Matches the semantics of vrgather.vx with an extra operand for VL. VRGATHER_VX_VL, + // Vector sign/zero extend with additional mask & VL operands. + VSEXT_VL, + VZEXT_VL, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, @@ -433,6 +437,8 @@ SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; + SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, + unsigned ExtendOpc) const; bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1189,9 +1189,15 @@ } case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: - return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); + if (Op.getOperand(0).getValueType().isVector() && + Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) + return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); + return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); case ISD::SIGN_EXTEND: - return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); + if (Op.getOperand(0).getValueType().isVector() && + Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) + return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); + return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); case ISD::SPLAT_VECTOR: return lowerSPLATVECTOR(Op, DAG); case ISD::INSERT_VECTOR_ELT: @@ -1865,9 +1871,8 @@ MVT VecVT = Op.getSimpleValueType(); SDValue Src = Op.getOperand(0); // Only custom-lower extensions from mask types - if (!Src.getValueType().isVector() || - Src.getValueType().getVectorElementType() != MVT::i1) - return Op; + assert(Src.getValueType().isVector() && + Src.getValueType().getVectorElementType() == MVT::i1); MVT XLenVT = Subtarget.getXLenVT(); SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); @@ -1912,6 +1917,32 @@ return convertFromScalableVector(VecVT, Select, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( + SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { + MVT ExtVT = Op.getSimpleValueType(); + // Only custom-lower extensions from fixed-length vector types. + if (!ExtVT.isFixedLengthVector()) + return Op; + MVT VT = Op.getOperand(0).getSimpleValueType(); + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); + // Get the extended container type manually to ensure the same number of + // vector elements between source and dest. + MVT ContainerExtVT = MVT::getVectorVT(ExtVT.getVectorElementType(), + ContainerVT.getVectorElementCount()); + + SDValue Op1 = + convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); + + SDLoc DL(Op); + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); + + return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); +} + // Custom-lower truncations from vectors to mask vectors by using a mask and a // setcc operation: // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) @@ -5321,6 +5352,8 @@ NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) + NODE_NAME_CASE(VSEXT_VL) + NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VLE_VL) NODE_NAME_CASE(VSE_VL) } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -140,6 +140,14 @@ def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl), (riscv_vmxor_vl node:$rs, true_mask, node:$vl)>; +def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameNumEltsAs<0, 1>, + SDTCisSameNumEltsAs<1, 2>, + SDTCVecEltisVT<2, i1>, + SDTCisVT<3, XLenVT>]>; +def riscv_sext_vl : SDNode<"RISCVISD::VSEXT_VL", SDT_RISCVVEXTEND_VL>; +def riscv_zext_vl : SDNode<"RISCVISD::VZEXT_VL", SDT_RISCVVEXTEND_VL>; + // Ignore the vl operand. def SplatFPOp : PatFrag<(ops node:$op), (riscv_vfmv_v_f_vl node:$op, srcvalue)>; @@ -352,6 +360,18 @@ } } +multiclass VPatExtendSDNode_V_VL fraction_list> { + foreach vtiTofti = fraction_list in { + defvar vti = vtiTofti.Vti; + defvar fti = vtiTofti.Fti; + def : Pat<(vti.Vector (vop (fti.Vector fti.RegClass:$rs2), + true_mask, (XLenVT (VLOp GPR:$vl)))), + (!cast(inst_name#"_"#suffix#"_"#vti.LMul.MX) + fti.RegClass:$rs2, GPR:$vl, vti.SEW)>; + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -399,6 +419,20 @@ vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.SEW)>; } +// 12.3. Vector Integer Extension +defm "" : VPatExtendSDNode_V_VL; +defm "" : VPatExtendSDNode_V_VL; +defm "" : VPatExtendSDNode_V_VL; +defm "" : VPatExtendSDNode_V_VL; +defm "" : VPatExtendSDNode_V_VL; +defm "" : VPatExtendSDNode_V_VL; + // 12.5. Vector Bitwise Logical Instructions defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX_VI; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +define void @sext_v4i8_v4i32(<4 x i8>* %x, <4 x i32>* %z) { +; CHECK-LABEL: sext_v4i8_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vsext.vf4 v28, v25 +; CHECK-NEXT: vsetvli a0, a2, e32,m1,ta,mu +; CHECK-NEXT: vse32.v v28, (a1) +; CHECK-NEXT: ret + %a = load <4 x i8>, <4 x i8>* %x + %b = sext <4 x i8> %a to <4 x i32> + store <4 x i32> %b, <4 x i32>* %z + ret void +} + +define void @zext_v4i8_v4i32(<4 x i8>* %x, <4 x i32>* %z) { +; CHECK-LABEL: zext_v4i8_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a3, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; CHECK-NEXT: vzext.vf4 v28, v25 +; CHECK-NEXT: vsetvli a0, a2, e32,m1,ta,mu +; CHECK-NEXT: vse32.v v28, (a1) +; CHECK-NEXT: ret + %a = load <4 x i8>, <4 x i8>* %x + %b = zext <4 x i8> %a to <4 x i32> + store <4 x i32> %b, <4 x i32>* %z + ret void +} + +define void @sext_v8i8_v8i32(<8 x i8>* %x, <8 x i32>* %z) { +; LMULMAX8-LABEL: sext_v8i8_v8i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a2, zero, 8 +; LMULMAX8-NEXT: vsetvli a3, a2, e8,m1,ta,mu +; LMULMAX8-NEXT: vle8.v v25, (a0) +; LMULMAX8-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; LMULMAX8-NEXT: vsext.vf4 v28, v25 +; LMULMAX8-NEXT: vsetvli a0, a2, e32,m2,ta,mu +; LMULMAX8-NEXT: vse32.v v28, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX2-LABEL: sext_v8i8_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a3, a2, e8,m1,ta,mu +; LMULMAX2-NEXT: vle8.v v25, (a0) +; LMULMAX2-NEXT: vsetvli a0, a2, e32,m4,ta,mu +; LMULMAX2-NEXT: vsext.vf4 v28, v25 +; LMULMAX2-NEXT: vsetvli a0, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vse32.v v28, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: sext_v8i8_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, zero, 8 +; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: addi a0, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v28, v25 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: addi a2, a1, 16 +; LMULMAX1-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v8, (a2) +; LMULMAX1-NEXT: vse32.v v28, (a1) +; LMULMAX1-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = sext <8 x i8> %a to <8 x i32> + store <8 x i32> %b, <8 x i32>* %z + ret void +} + +define void @sext_v32i8_v32i32(<32 x i8>* %x, <32 x i32>* %z) { +; LMULMAX8-LABEL: sext_v32i8_v32i32: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a2, zero, 32 +; LMULMAX8-NEXT: vsetvli a3, a2, e8,m2,ta,mu +; LMULMAX8-NEXT: vle8.v v26, (a0) +; LMULMAX8-NEXT: vsetvli a0, a2, e32,m8,ta,mu +; LMULMAX8-NEXT: vsext.vf4 v8, v26 +; LMULMAX8-NEXT: vse32.v v8, (a1) +; LMULMAX8-NEXT: ret +; +; LMULMAX2-LABEL: sext_v32i8_v32i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v25, v27, 8 +; LMULMAX2-NEXT: addi a0, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX2-NEXT: vsext.vf4 v28, v25 +; LMULMAX2-NEXT: vsext.vf4 v8, v26 +; LMULMAX2-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX2-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX2-NEXT: vsext.vf4 v12, v26 +; LMULMAX2-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX2-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX2-NEXT: vsext.vf4 v16, v25 +; LMULMAX2-NEXT: addi a2, a1, 96 +; LMULMAX2-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; LMULMAX2-NEXT: vse32.v v16, (a2) +; LMULMAX2-NEXT: addi a0, a1, 32 +; LMULMAX2-NEXT: vse32.v v12, (a0) +; LMULMAX2-NEXT: vse32.v v8, (a1) +; LMULMAX2-NEXT: addi a0, a1, 64 +; LMULMAX2-NEXT: vse32.v v28, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: sext_v32i8_v32i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: .cfi_def_cfa_offset 0 +; LMULMAX1-NEXT: csrr a2, vlenb +; LMULMAX1-NEXT: slli a2, a2, 2 +; LMULMAX1-NEXT: sub sp, sp, a2 +; LMULMAX1-NEXT: addi a2, zero, 16 +; LMULMAX1-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-NEXT: addi a2, a0, 16 +; LMULMAX1-NEXT: vle8.v v25, (a2) +; LMULMAX1-NEXT: vle8.v v26, (a0) +; LMULMAX1-NEXT: addi a0, zero, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v28, v25 +; LMULMAX1-NEXT: vs4r.v v28, (sp) # Unknown-size Folded Spill +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v27 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v0, v25, 8 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v12, v0 +; LMULMAX1-NEXT: vsext.vf4 v16, v26 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v20, v25 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v1, v26, 8 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v24, v1 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v0, v0, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v4, v0 +; LMULMAX1-NEXT: vsetvli a2, zero, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v0, v1, 4 +; LMULMAX1-NEXT: vsetvli a2, a0, e32,m4,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v28, v0 +; LMULMAX1-NEXT: addi a2, a1, 48 +; LMULMAX1-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v28, (a2) +; LMULMAX1-NEXT: addi a0, a1, 112 +; LMULMAX1-NEXT: vse32.v v4, (a0) +; LMULMAX1-NEXT: addi a0, a1, 32 +; LMULMAX1-NEXT: vse32.v v24, (a0) +; LMULMAX1-NEXT: addi a0, a1, 16 +; LMULMAX1-NEXT: vse32.v v20, (a0) +; LMULMAX1-NEXT: vse32.v v16, (a1) +; LMULMAX1-NEXT: addi a0, a1, 96 +; LMULMAX1-NEXT: vse32.v v12, (a0) +; LMULMAX1-NEXT: addi a0, a1, 80 +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a1, 64 +; LMULMAX1-NEXT: vl4re8.v v28, (sp) # Unknown-size Folded Reload +; LMULMAX1-NEXT: vse32.v v28, (a0) +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 2 +; LMULMAX1-NEXT: add sp, sp, a0 +; LMULMAX1-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = sext <32 x i8> %a to <32 x i32> + store <32 x i32> %b, <32 x i32>* %z + ret void +}