diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -41,6 +41,12 @@ STATISTIC(NumTailCalls, "Number of tail calls"); +static cl::opt RISCVNonScalables( + "riscv-non-scalable-vectors", + cl::desc("Enable ISel support for non-scalable vectors in " + "conjunction with +experimental-v"), + cl::init(false), cl::Hidden); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -132,6 +138,11 @@ addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass); addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass); addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass); + + if (RISCVNonScalables) { + addRegisterClass(MVT::v4i32, &RISCV::VRRegClass); + addRegisterClass(MVT::v8i32, &RISCV::VRM2RegClass); + } } // Compute derived properties from the register classes. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoPseudoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoPseudoV.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoPseudoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoPseudoV.td @@ -46,6 +46,8 @@ // Output pattern for X0 used to represent VLMAX in the pseudo instructions. def VLMax : OutPatFrag<(ops), (XLenVT X0)>; +// Output pattern for an immediate AVL +class VLImm : OutPatFrag<(ops), (ADDI X0, vl)>; // List of EEW. defvar EEWList = [8, 16, 32, 64]; @@ -54,50 +56,57 @@ // Vector register and vector group type information. //===----------------------------------------------------------------------===// -class VectorTypeInfo +class VectorTypeInfo { ValueType Vector = Vec; ValueType Mask = Mas; + int NumElts = NElts; int SEW = Sew; VReg RegClass = Reg; LMULInfo LMul = M; } class GroupVectorTypeInfo - : VectorTypeInfo + int NElts, int Sew, VReg Reg, LMULInfo M> + : VectorTypeInfo { ValueType VectorM1 = VecM1; } defset list AllVectors = { defset list AllIntegerVectors = { - def VtypeInt8MF8 : VectorTypeInfo; - def VtypeInt8MF4 : VectorTypeInfo; - def VtypeInt8MF2 : VectorTypeInfo; - def VtypeInt8M1 : VectorTypeInfo; - def VtypeInt16MF4 : VectorTypeInfo; - def VtypeInt16MF2 : VectorTypeInfo; - def VtypeInt16M1 : VectorTypeInfo; - def VtypeInt32MF2 : VectorTypeInfo; - def VtypeInt32M1 : VectorTypeInfo; - def VtypeInt64M1 : VectorTypeInfo; - - def VtypeInt8M2 : GroupVectorTypeInfo; - def VtypeInt8M4 : GroupVectorTypeInfo; - def VtypeInt8M8 : GroupVectorTypeInfo; - - def VtypeInt16M2 : GroupVectorTypeInfo; - def VtypeInt16M4 : GroupVectorTypeInfo; - def VtypeInt16M8 : GroupVectorTypeInfo; - - def VtypeInt32M2 : GroupVectorTypeInfo; - def VtypeInt32M4 : GroupVectorTypeInfo; - def VtypeInt32M8 : GroupVectorTypeInfo; - - def VtypeInt64M2 : GroupVectorTypeInfo; - def VtypeInt64M4 : GroupVectorTypeInfo; - def VtypeInt64M8 : GroupVectorTypeInfo; + def VtypeInt8MF8 : VectorTypeInfo; + def VtypeInt8MF4 : VectorTypeInfo; + def VtypeInt8MF2 : VectorTypeInfo; + def VtypeInt8M1 : VectorTypeInfo; + def VtypeInt16MF4 : VectorTypeInfo; + def VtypeInt16MF2 : VectorTypeInfo; + def VtypeInt16M1 : VectorTypeInfo; + def VtypeInt32MF2 : VectorTypeInfo; + def VtypeInt32M1 : VectorTypeInfo; + def VtypeInt64M1 : VectorTypeInfo; + + def VtypeInt8M2 : GroupVectorTypeInfo; + def VtypeInt8M4 : GroupVectorTypeInfo; + def VtypeInt8M8 : GroupVectorTypeInfo; + + def VtypeInt16M2 : GroupVectorTypeInfo; + def VtypeInt16M4 : GroupVectorTypeInfo; + def VtypeInt16M8 : GroupVectorTypeInfo; + + def VtypeInt32M2 : GroupVectorTypeInfo; + def VtypeInt32M4 : GroupVectorTypeInfo; + def VtypeInt32M8 : GroupVectorTypeInfo; + + def VtypeInt64M2 : GroupVectorTypeInfo; + def VtypeInt64M4 : GroupVectorTypeInfo; + def VtypeInt64M8 : GroupVectorTypeInfo; + + // Non-scalable vector types which assume VLEN>=128 + // FIXME: These mask types aren't right + def VtypeV4I32 : VectorTypeInfo; + def VtypeV8I32 : VectorTypeInfo; } } @@ -165,6 +174,7 @@ ValueType result_type, ValueType op_type, ValueType mask_type, + OutPatFrag avl_pat, int sew, LMULInfo vlmul, VReg result_reg_class, @@ -179,7 +189,7 @@ op_reg_class:$rs1, op_reg_class:$rs2, (mask_type zero_reg), - VLMax, + avl_pat, sew)>; } @@ -189,8 +199,9 @@ { foreach vti = vtilist in { + defvar vl = !if(!eq(vti.NumElts, 0), VLMax, VLImm); defm : pat_vop_binary; } } @@ -259,6 +270,7 @@ // Patterns. multiclass pat_load_store @@ -270,27 +282,28 @@ (load_instr (type (IMPLICIT_DEF)), GPR:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; def : Pat<(type (load AddrFI:$rs1)), (load_instr (type (IMPLICIT_DEF)), AddrFI:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; // Store def : Pat<(store type:$rs2, GPR:$rs1), (store_instr reg_class:$rs2, GPR:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; def : Pat<(store type:$rs2, AddrFI:$rs1), (store_instr reg_class:$rs2, AddrFI:$rs1, (mask_type zero_reg), - VLMax, sew)>; + avl_pat, sew)>; } foreach vti = AllVectors in { - defm : pat_load_store); + defm : pat_load_store; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -373,7 +373,7 @@ vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, vfloat32mf2_t, vfloat32m1_t, vfloat64m1_t, vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t, - vbool2_t, vbool1_t], + vbool2_t, vbool1_t, v4i32], (add (sequence "V%u", 25, 31), (sequence "V%u", 8, 24), (sequence "V%u", 0, 7)), 1>; @@ -389,7 +389,7 @@ (sequence "V%u", 1, 7)), 1>; def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, - vfloat32m2_t, vfloat64m2_t], + vfloat32m2_t, vfloat64m2_t, v8i32], (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2, V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>; diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-nonscale.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-nonscale.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-nonscale.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,NO-VECTOR-32 +; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,NO-VECTOR-64 +; RUN: llc -mtriple riscv32 -mattr=+experimental-v --riscv-non-scalable-vectors %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,VECTOR +; RUN: llc -mtriple riscv64 -mattr=+experimental-v --riscv-non-scalable-vectors %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,VECTOR + +define void @vadd_v4int32(<4 x i32> *%pc, <4 x i32> *%pa, <4 x i32> *%pb) nounwind { +; NO-VECTOR-32-LABEL: vadd_v4int32: +; NO-VECTOR-32: # %bb.0: +; NO-VECTOR-32-NEXT: lw a6, 12(a1) +; NO-VECTOR-32-NEXT: lw a7, 8(a1) +; NO-VECTOR-32-NEXT: lw t0, 4(a1) +; NO-VECTOR-32-NEXT: lw a1, 0(a1) +; NO-VECTOR-32-NEXT: lw a3, 0(a2) +; NO-VECTOR-32-NEXT: lw a4, 4(a2) +; NO-VECTOR-32-NEXT: lw a5, 8(a2) +; NO-VECTOR-32-NEXT: lw a2, 12(a2) +; NO-VECTOR-32-NEXT: add a1, a1, a3 +; NO-VECTOR-32-NEXT: add a3, t0, a4 +; NO-VECTOR-32-NEXT: add a4, a7, a5 +; NO-VECTOR-32-NEXT: add a2, a6, a2 +; NO-VECTOR-32-NEXT: sw a2, 12(a0) +; NO-VECTOR-32-NEXT: sw a4, 8(a0) +; NO-VECTOR-32-NEXT: sw a3, 4(a0) +; NO-VECTOR-32-NEXT: sw a1, 0(a0) +; NO-VECTOR-32-NEXT: ret +; +; NO-VECTOR-64-LABEL: vadd_v4int32: +; NO-VECTOR-64: # %bb.0: +; NO-VECTOR-64-NEXT: lw a6, 12(a1) +; NO-VECTOR-64-NEXT: lw a7, 8(a1) +; NO-VECTOR-64-NEXT: lw t0, 4(a1) +; NO-VECTOR-64-NEXT: lw a1, 0(a1) +; NO-VECTOR-64-NEXT: lw a3, 0(a2) +; NO-VECTOR-64-NEXT: lw a4, 4(a2) +; NO-VECTOR-64-NEXT: lw a5, 8(a2) +; NO-VECTOR-64-NEXT: lw a2, 12(a2) +; NO-VECTOR-64-NEXT: add a1, a1, a3 +; NO-VECTOR-64-NEXT: add a3, t0, a4 +; NO-VECTOR-64-NEXT: add a4, a7, a5 +; NO-VECTOR-64-NEXT: add a2, a6, a2 +; NO-VECTOR-64-NEXT: sw a2, 12(a0) +; NO-VECTOR-64-NEXT: sw a4, 8(a0) +; NO-VECTOR-64-NEXT: sw a3, 4(a0) +; NO-VECTOR-64-NEXT: sw a1, 0(a0) +; NO-VECTOR-64-NEXT: ret +; +; VECTOR-LABEL: vadd_v4int32: +; VECTOR: # %bb.0: +; VECTOR-NEXT: addi a3, zero, 4 +; VECTOR-NEXT: vsetvli a4, a3, e32,m1,tu,mu +; VECTOR-NEXT: vle32.v v25, (a1) +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vle32.v v26, (a2) +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vadd.vv v25, v25, v26 +; VECTOR-NEXT: vsetvli a1, a3, e32,m1,tu,mu +; VECTOR-NEXT: vse32.v v25, (a0) +; VECTOR-NEXT: ret + %va = load <4 x i32>, <4 x i32>* %pa + %vb = load <4 x i32>, <4 x i32>* %pb + %vc = add <4 x i32> %va, %vb + store <4 x i32> %vc, <4 x i32> *%pc + ret void +} + +define void @vadd_v8int32(<8 x i32> *%pc, <8 x i32> *%pa, <8 x i32> *%pb) nounwind { +; NO-VECTOR-32-LABEL: vadd_v8int32: +; NO-VECTOR-32: # %bb.0: +; NO-VECTOR-32-NEXT: addi sp, sp, -16 +; NO-VECTOR-32-NEXT: sw s0, 12(sp) +; NO-VECTOR-32-NEXT: sw s1, 8(sp) +; NO-VECTOR-32-NEXT: sw s2, 4(sp) +; NO-VECTOR-32-NEXT: lw a6, 28(a1) +; NO-VECTOR-32-NEXT: lw a7, 24(a1) +; NO-VECTOR-32-NEXT: lw t0, 20(a1) +; NO-VECTOR-32-NEXT: lw t1, 16(a1) +; NO-VECTOR-32-NEXT: lw t3, 12(a1) +; NO-VECTOR-32-NEXT: lw t5, 8(a1) +; NO-VECTOR-32-NEXT: lw t6, 4(a1) +; NO-VECTOR-32-NEXT: lw a1, 0(a1) +; NO-VECTOR-32-NEXT: lw t2, 28(a2) +; NO-VECTOR-32-NEXT: lw t4, 24(a2) +; NO-VECTOR-32-NEXT: lw a4, 20(a2) +; NO-VECTOR-32-NEXT: lw a5, 16(a2) +; NO-VECTOR-32-NEXT: lw a3, 0(a2) +; NO-VECTOR-32-NEXT: lw s0, 4(a2) +; NO-VECTOR-32-NEXT: lw s1, 8(a2) +; NO-VECTOR-32-NEXT: lw a2, 12(a2) +; NO-VECTOR-32-NEXT: add s2, a1, a3 +; NO-VECTOR-32-NEXT: add a3, t6, s0 +; NO-VECTOR-32-NEXT: add s1, t5, s1 +; NO-VECTOR-32-NEXT: add a2, t3, a2 +; NO-VECTOR-32-NEXT: add a5, t1, a5 +; NO-VECTOR-32-NEXT: add a4, t0, a4 +; NO-VECTOR-32-NEXT: add s0, a7, t4 +; NO-VECTOR-32-NEXT: add a1, a6, t2 +; NO-VECTOR-32-NEXT: sw a1, 28(a0) +; NO-VECTOR-32-NEXT: sw s0, 24(a0) +; NO-VECTOR-32-NEXT: sw a4, 20(a0) +; NO-VECTOR-32-NEXT: sw a5, 16(a0) +; NO-VECTOR-32-NEXT: sw a2, 12(a0) +; NO-VECTOR-32-NEXT: sw s1, 8(a0) +; NO-VECTOR-32-NEXT: sw a3, 4(a0) +; NO-VECTOR-32-NEXT: sw s2, 0(a0) +; NO-VECTOR-32-NEXT: lw s2, 4(sp) +; NO-VECTOR-32-NEXT: lw s1, 8(sp) +; NO-VECTOR-32-NEXT: lw s0, 12(sp) +; NO-VECTOR-32-NEXT: addi sp, sp, 16 +; NO-VECTOR-32-NEXT: ret +; +; NO-VECTOR-64-LABEL: vadd_v8int32: +; NO-VECTOR-64: # %bb.0: +; NO-VECTOR-64-NEXT: addi sp, sp, -32 +; NO-VECTOR-64-NEXT: sd s0, 24(sp) +; NO-VECTOR-64-NEXT: sd s1, 16(sp) +; NO-VECTOR-64-NEXT: sd s2, 8(sp) +; NO-VECTOR-64-NEXT: lw a6, 0(a1) +; NO-VECTOR-64-NEXT: lw a7, 4(a1) +; NO-VECTOR-64-NEXT: lw t0, 8(a1) +; NO-VECTOR-64-NEXT: lw t1, 12(a1) +; NO-VECTOR-64-NEXT: lw t3, 16(a1) +; NO-VECTOR-64-NEXT: lw t5, 20(a1) +; NO-VECTOR-64-NEXT: lw t6, 24(a1) +; NO-VECTOR-64-NEXT: lw a1, 28(a1) +; NO-VECTOR-64-NEXT: lw t2, 0(a2) +; NO-VECTOR-64-NEXT: lw t4, 4(a2) +; NO-VECTOR-64-NEXT: lw a4, 8(a2) +; NO-VECTOR-64-NEXT: lw a5, 12(a2) +; NO-VECTOR-64-NEXT: lw a3, 28(a2) +; NO-VECTOR-64-NEXT: lw s0, 24(a2) +; NO-VECTOR-64-NEXT: lw s1, 20(a2) +; NO-VECTOR-64-NEXT: lw a2, 16(a2) +; NO-VECTOR-64-NEXT: add s2, a1, a3 +; NO-VECTOR-64-NEXT: add a3, t6, s0 +; NO-VECTOR-64-NEXT: add s1, t5, s1 +; NO-VECTOR-64-NEXT: add a2, t3, a2 +; NO-VECTOR-64-NEXT: add a5, t1, a5 +; NO-VECTOR-64-NEXT: add a4, t0, a4 +; NO-VECTOR-64-NEXT: add s0, a7, t4 +; NO-VECTOR-64-NEXT: add a1, a6, t2 +; NO-VECTOR-64-NEXT: sw a1, 0(a0) +; NO-VECTOR-64-NEXT: sw s0, 4(a0) +; NO-VECTOR-64-NEXT: sw a4, 8(a0) +; NO-VECTOR-64-NEXT: sw a5, 12(a0) +; NO-VECTOR-64-NEXT: sw a2, 16(a0) +; NO-VECTOR-64-NEXT: sw s1, 20(a0) +; NO-VECTOR-64-NEXT: sw a3, 24(a0) +; NO-VECTOR-64-NEXT: sw s2, 28(a0) +; NO-VECTOR-64-NEXT: ld s2, 8(sp) +; NO-VECTOR-64-NEXT: ld s1, 16(sp) +; NO-VECTOR-64-NEXT: ld s0, 24(sp) +; NO-VECTOR-64-NEXT: addi sp, sp, 32 +; NO-VECTOR-64-NEXT: ret +; +; VECTOR-LABEL: vadd_v8int32: +; VECTOR: # %bb.0: +; VECTOR-NEXT: addi a3, zero, 8 +; VECTOR-NEXT: vsetvli a4, a3, e32,m2,tu,mu +; VECTOR-NEXT: vle32.v v26, (a1) +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vle32.v v28, (a2) +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vadd.vv v26, v26, v28 +; VECTOR-NEXT: vsetvli a1, a3, e32,m2,tu,mu +; VECTOR-NEXT: vse32.v v26, (a0) +; VECTOR-NEXT: ret + %va = load <8 x i32>, <8 x i32>* %pa + %vb = load <8 x i32>, <8 x i32>* %pb + %vc = add <8 x i32> %va, %vb + store <8 x i32> %vc, <8 x i32> *%pc + ret void +}