diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -41,6 +41,12 @@ STATISTIC(NumTailCalls, "Number of tail calls"); +static cl::opt + RVVFixedLen("riscv-fixed-len-rvv", + cl::desc("Enable ISel support for fixed-length vectors in " + "conjunction with +experimental-v"), + cl::init(false), cl::Hidden); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -134,6 +140,11 @@ addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass); addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass); addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass); + + if (RVVFixedLen) { + addRegisterClass(MVT::v2i32, &RISCV::VRRegClass); + addRegisterClass(MVT::v4i32, &RISCV::VRM2RegClass); + } } // Compute derived properties from the register classes. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -46,6 +46,8 @@ // Output pattern for X0 used to represent VLMAX in the pseudo instructions. def VLMax : OutPatFrag<(ops), (XLenVT X0)>; +// Output pattern for an immediate AVL +def VLImm : OutPatFrag<(ops node:$node), (ADDI X0, node:$node)>; // List of EEW. defvar EEWList = [8, 16, 32, 64]; @@ -54,18 +56,22 @@ // Vector register and vector group type information. //===----------------------------------------------------------------------===// -class VTypeInfo +class VTypeInfo { ValueType Vector = Vec; ValueType Mask = Mas; int SEW = Sew; VReg RegClass = Reg; LMULInfo LMul = M; + int NumElts = NElts; + OutPatFrag AVL = !if(!eq(NumElts, -1), + VLMax, OutPatFrag<(ops), (ADDI X0, NumElts)>); } class GroupVTypeInfo - : VTypeInfo + int Sew, VReg Reg, LMULInfo M, int NElts=-1> + : VTypeInfo { ValueType VectorM1 = VecM1; } @@ -98,6 +104,10 @@ def : GroupVTypeInfo; def : GroupVTypeInfo; def : GroupVTypeInfo; + + // Non-scalable vector types which assume VLEN >= 64 + def VtypeV2I32 : VTypeInfo; + def VtypeV4I32 : VTypeInfo; } } @@ -165,6 +175,7 @@ ValueType result_type, ValueType op_type, ValueType mask_type, + OutPatFrag avl_pat, int sew, LMULInfo vlmul, VReg result_reg_class, @@ -179,7 +190,7 @@ op_reg_class:$rs1, op_reg_class:$rs2, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; } multiclass pat_vop_binary_common; + vti.Vector, vti.Vector, vti.Mask, vti.AVL, + vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>; } //===----------------------------------------------------------------------===// @@ -256,6 +267,7 @@ // Patterns. multiclass pat_load_store @@ -267,27 +279,27 @@ (load_instr (type (IMPLICIT_DEF)), GPR:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; def : Pat<(type (load AddrFI:$rs1)), (load_instr (type (IMPLICIT_DEF)), AddrFI:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; // Store def : Pat<(store type:$rs2, GPR:$rs1), (store_instr reg_class:$rs2, GPR:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; def : Pat<(store type:$rs2, AddrFI:$rs1), (store_instr reg_class:$rs2, AddrFI:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; } foreach vti = AllVectors in { - defm : pat_load_store; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -393,7 +393,7 @@ vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t, vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t, - vbool2_t, vbool1_t], + vbool2_t, vbool1_t, v2i32, v2i1, v4i1], (add (sequence "V%u", 25, 31), (sequence "V%u", 8, 24), (sequence "V%u", 0, 7)), 1>; @@ -409,7 +409,7 @@ (sequence "V%u", 1, 7)), 1>; def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, - vfloat32m2_t, vfloat64m2_t], + vfloat32m2_t, vfloat64m2_t, v4i32], (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>; @@ -422,7 +422,7 @@ (add V8M8, V16M8, V24M8, V0M8), 8>; defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t, - vbool4_t, vbool2_t, vbool1_t]; + vbool4_t, vbool2_t, vbool1_t, v2i1, v4i1]; def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { let Size = 64; diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-fixedlen.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-fixedlen.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-fixedlen.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=NO-VECTOR-32 +; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=NO-VECTOR-64 +; RUN: llc -mtriple riscv32 -mattr=+experimental-v --riscv-fixed-len-rvv %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=VECTOR +; RUN: llc -mtriple riscv64 -mattr=+experimental-v --riscv-fixed-len-rvv %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=VECTOR + +define void @vadd_v2i32(<2 x i32> *%pc, <2 x i32> *%pa, <2 x i32> *%pb) nounwind { +; NO-VECTOR-32-LABEL: vadd_v2i32: +; NO-VECTOR-32: # %bb.0: +; NO-VECTOR-32-NEXT: lw a3, 4(a1) +; NO-VECTOR-32-NEXT: lw a1, 0(a1) +; NO-VECTOR-32-NEXT: lw a4, 0(a2) +; NO-VECTOR-32-NEXT: lw a2, 4(a2) +; NO-VECTOR-32-NEXT: add a1, a1, a4 +; NO-VECTOR-32-NEXT: add a2, a3, a2 +; NO-VECTOR-32-NEXT: sw a2, 4(a0) +; NO-VECTOR-32-NEXT: sw a1, 0(a0) +; NO-VECTOR-32-NEXT: ret +; +; NO-VECTOR-64-LABEL: vadd_v2i32: +; NO-VECTOR-64: # %bb.0: +; NO-VECTOR-64-NEXT: lw a3, 4(a1) +; NO-VECTOR-64-NEXT: lw a1, 0(a1) +; NO-VECTOR-64-NEXT: lw a4, 0(a2) +; NO-VECTOR-64-NEXT: lw a2, 4(a2) +; NO-VECTOR-64-NEXT: add a1, a1, a4 +; NO-VECTOR-64-NEXT: add a2, a3, a2 +; NO-VECTOR-64-NEXT: sw a2, 4(a0) +; NO-VECTOR-64-NEXT: sw a1, 0(a0) +; NO-VECTOR-64-NEXT: ret +; +; VECTOR-LABEL: vadd_v2i32: +; VECTOR: # %bb.0: +; VECTOR-NEXT: addi a3, zero, 2 +; VECTOR-NEXT: vsetvli a4, a3, e32,m1,tu,mu +; VECTOR-NEXT: vle32.v v25, (a1) +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vle32.v v26, (a2) +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vadd.vv v25, v25, v26 +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vse32.v v25, (a0) +; VECTOR-NEXT: ret + %va = load <2 x i32>, <2 x i32>* %pa + %vb = load <2 x i32>, <2 x i32>* %pb + %vc = add <2 x i32> %va, %vb + store <2 x i32> %vc, <2 x i32> *%pc + ret void +} + +define void @vadd_v4i32(<4 x i32> *%pc, <4 x i32> *%pa, <4 x i32> *%pb) nounwind { +; NO-VECTOR-32-LABEL: vadd_v4i32: +; NO-VECTOR-32: # %bb.0: +; NO-VECTOR-32-NEXT: lw a6, 12(a1) +; NO-VECTOR-32-NEXT: lw a7, 8(a1) +; NO-VECTOR-32-NEXT: lw t0, 4(a1) +; NO-VECTOR-32-NEXT: lw a1, 0(a1) +; NO-VECTOR-32-NEXT: lw a3, 0(a2) +; NO-VECTOR-32-NEXT: lw a4, 4(a2) +; NO-VECTOR-32-NEXT: lw a5, 8(a2) +; NO-VECTOR-32-NEXT: lw a2, 12(a2) +; NO-VECTOR-32-NEXT: add a1, a1, a3 +; NO-VECTOR-32-NEXT: add a3, t0, a4 +; NO-VECTOR-32-NEXT: add a4, a7, a5 +; NO-VECTOR-32-NEXT: add a2, a6, a2 +; NO-VECTOR-32-NEXT: sw a2, 12(a0) +; NO-VECTOR-32-NEXT: sw a4, 8(a0) +; NO-VECTOR-32-NEXT: sw a3, 4(a0) +; NO-VECTOR-32-NEXT: sw a1, 0(a0) +; NO-VECTOR-32-NEXT: ret +; +; NO-VECTOR-64-LABEL: vadd_v4i32: +; NO-VECTOR-64: # %bb.0: +; NO-VECTOR-64-NEXT: lw a6, 12(a1) +; NO-VECTOR-64-NEXT: lw a7, 8(a1) +; NO-VECTOR-64-NEXT: lw t0, 4(a1) +; NO-VECTOR-64-NEXT: lw a1, 0(a1) +; NO-VECTOR-64-NEXT: lw a3, 0(a2) +; NO-VECTOR-64-NEXT: lw a4, 4(a2) +; NO-VECTOR-64-NEXT: lw a5, 8(a2) +; NO-VECTOR-64-NEXT: lw a2, 12(a2) +; NO-VECTOR-64-NEXT: add a1, a1, a3 +; NO-VECTOR-64-NEXT: add a3, t0, a4 +; NO-VECTOR-64-NEXT: add a4, a7, a5 +; NO-VECTOR-64-NEXT: add a2, a6, a2 +; NO-VECTOR-64-NEXT: sw a2, 12(a0) +; NO-VECTOR-64-NEXT: sw a4, 8(a0) +; NO-VECTOR-64-NEXT: sw a3, 4(a0) +; NO-VECTOR-64-NEXT: sw a1, 0(a0) +; NO-VECTOR-64-NEXT: ret +; +; VECTOR-LABEL: vadd_v4i32: +; VECTOR: # %bb.0: +; VECTOR-NEXT: addi a3, zero, 4 +; VECTOR-NEXT: vsetvli a4, a3, e32,m2,tu,mu +; VECTOR-NEXT: vle32.v v26, (a1) +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vle32.v v28, (a2) +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vadd.vv v26, v26, v28 +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vse32.v v26, (a0) +; VECTOR-NEXT: ret + %va = load <4 x i32>, <4 x i32>* %pa + %vb = load <4 x i32>, <4 x i32>* %pb + %vc = add <4 x i32> %va, %vb + store <4 x i32> %vc, <4 x i32> *%pc + ret void +}