diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -11,6 +11,15 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +class GenForm Gen> { + bit VV = Gen{2}; + bit VX = Gen{1}; + bit VI = Gen{0}; +} +def GenVV_VX_VI : GenForm<0b111>; +def GenVV_VX: GenForm<0b110>; +def GenVX_VI: GenForm<0b011>; + // Atomics // Atomic Intrinsics have multiple versions for different access widths, which @@ -65,4 +74,76 @@ // @llvm.riscv.masked.cmpxchg.{i32,i64}.

(...) defm int_riscv_masked_cmpxchg : MaskedAtomicRMWFiveArgIntrinsics; + +// optional vl +multiclass RVV_VL output, list input, +list attr> { + + defvar vl_input = !listconcat(input, [llvm_anyint_ty]); + + def "" : Intrinsic; + def _VL : Intrinsic; +} + +// optional mask and maskedoff +multiclass RVV_M output, list input, +list attr, bit WithMO = true> { + defvar MaskInput = !listconcat(!if(WithMO, [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>]), input); + + defm "" : RVV_VL; + defm _M : RVV_VL; +} + +// Load unit +multiclass RVV_LOAD_UNIT { + defvar output = [llvm_anyvector_ty]; + defvar input = [llvm_anyptr_ty]; + defvar attr = [IntrReadMem, IntrArgMemOnly]; + + defm "" : RVV_M; +} + +// store unit +multiclass RVV_STORE_UNIT { + defvar output = []; + defvar input = [llvm_anyvector_ty, llvm_anyptr_ty]; + defvar attr = [IntrWriteMem, IntrArgMemOnly]; + + defm "" : RVV_M; +} +// +// Load/Store +// support EEW up to ELEN = 64 +foreach EEW = [8, 16, 32, 64] in { + defm int_riscv_VLE#EEW#_V : RVV_LOAD_UNIT; + defm int_riscv_VSE#EEW#_V : RVV_STORE_UNIT; +} + + +// with optional mask and maskedoff +multiclass RVV_ALU_OM output, list> inputs, GenForm Gen> { + defvar attr = [IntrNoMem]; + defvar attr_vi = [IntrNoMem, ImmArg>]; + + if Gen.VV then + defm _VV : RVV_M; + if Gen.VX then + defm _VX : RVV_M; + if Gen.VI then + defm _VI : RVV_M; +} + + +defvar RVV_ALU_VV_V_X_I_OM_List = ["VADD"]; +foreach I = RVV_ALU_VV_V_X_I_OM_List in { + defvar output = [llvm_anyvector_ty]; + defvar input_vv = [LLVMMatchType<0>, LLVMMatchType<0>]; + defvar input_vx = [LLVMMatchType<0>, llvm_anyint_ty]; + defvar input_vi = [LLVMMatchType<0>, llvm_anyint_ty]; + defvar inputs = [input_vv, input_vx, input_vi]; + + defm int_riscv_#I : RVV_ALU_OM; +} + } // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -182,6 +182,54 @@ VLMax, sew)>; } +multiclass pat_intrinsic_binary { + + def : Pat<(result_type (fromOp (op1_type op_reg_class:$op1), + (op2_type op2_kind:$op2))), + (toOp (result_type (IMPLICIT_DEF)), op_reg_class:$op1, + op2_kind:$op2, + (mask_type zero_reg), VLMax, sew)>; +} + + + +multiclass pat_intrinsic_binary_common +{ + foreach vti = AllIntegerVectors in { + defvar LMulSuffix = vti.LMul.MX; + defvar op_vv_name = op_name#_VV; + defvar op_vx_name = op_name#_VX; + defvar op_vi_name = op_name#_VI; + + if Gen.VV then { + defvar intrinsic_vv = !cast("int_riscv_"#op_vv_name); + defvar pseudo_vv = !cast("Pseudo"#op_vv_name#_#LMulSuffix); + defm : pat_intrinsic_binary; + } + if Gen.VX then { + defvar intrinsic_vx = !cast("int_riscv_"#op_vx_name); + defvar pseudo_vx = !cast("Pseudo"#op_vx_name#_#LMulSuffix); + defm : pat_intrinsic_binary; + } + if Gen.VI then { + defvar intrinsic_vi = !cast("int_riscv_"#op_vi_name); + defvar pseudo_vi = !cast("Pseudo"#op_vi_name#_#LMulSuffix); + defm : pat_intrinsic_binary; + } + } +} + multiclass pat_vop_binary_common vtilist> @@ -253,6 +301,30 @@ } } +multiclass pat_intrinsic_load +{ + foreach vti = AllVectors in { + defvar intrinsic = !cast("int_riscv_"#op_name # vti.SEW # "_V"); + defvar instruction = !cast("Pseudo"#op_name # vti.SEW # "_V_"# + vti.LMul.MX); + def : Pat<(vti.Vector (intrinsic GPR:$rs1)), + (instruction (vti.Vector (IMPLICIT_DEF)), GPR:$rs1, + (vti.Mask zero_reg), VLMax, vti.SEW)>; + } +} + +multiclass pat_intrinsic_store +{ + foreach vti = AllVectors in { + defvar intrinsic = !cast("int_riscv_"#op_name # vti.SEW # "_V"); + defvar instruction = !cast("Pseudo"#op_name # vti.SEW # "_V_"# + vti.LMul.MX); + def : Pat<(intrinsic vti.Vector:$rs2, GPR:$rs1), + (instruction vti.RegClass:$rs2, GPR:$rs1, (vti.Mask zero_reg), + VLMax, vti.SEW)>; + } + +} // Patterns. multiclass pat_load_store; } +defm "" : pat_intrinsic_load<"VLE">; +defm "" :pat_intrinsic_store<"VSE">; //===----------------------------------------------------------------------===// // 12. Vector Integer Arithmetic Instructions //===----------------------------------------------------------------------===// @@ -305,4 +379,10 @@ // Whole-register vector patterns. defm "" : pat_vop_binary_common; + +// intrinsic pattern +defvar VALU_V_X_I_List = ["VADD"]; +foreach I = VALU_V_X_I_List in + defm "" : pat_intrinsic_binary_common; + } // Predicates = [HasStdExtV] diff --git a/llvm/test/CodeGen/RISCV/rvv/intrinsic-load-add-store-32.ll b/llvm/test/CodeGen/RISCV/rvv/intrinsic-load-add-store-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/intrinsic-load-add-store-32.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: -verify-machineinstrs | FileCheck %s + +define void @vadd_vint32m1( *%pc, *%pa, *%pb) nounwind { +; CHECK-LABEL: vadd_vint32m1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a3, zero, e32,m1,tu,mu +; CHECK-NEXT: vle32.v v25, (a1) +; CHECK-NEXT: vsetvli a1, zero, e32,m1,tu,mu +; CHECK-NEXT: vle32.v v26, (a2) +; CHECK-NEXT: vsetvli a1, zero, e32,m1,tu,mu +; CHECK-NEXT: vadd.vv v25, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,tu,mu +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %va = tail call @llvm.riscv.VLE32.V.nxv2i32.p0nxv2i32( * %pa) + %vb = tail call @llvm.riscv.VLE32.V.nxv2i32.p0nxv2i32( * %pb) + %vc = tail call @llvm.riscv.VADD.VV.nxv2i32( %va, %vb) + tail call void @llvm.riscv.VSE32.V.nxv2i32.p0nxv2i32( %vc, * %pc) + ret void +} + +define void @vadd_vint32m2( *%pc, *%pa, *%pb) nounwind { +; CHECK-LABEL: vadd_vint32m2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a3, zero, e32,m2,tu,mu +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vsetvli a1, zero, e32,m2,tu,mu +; CHECK-NEXT: vle32.v v28, (a2) +; CHECK-NEXT: vsetvli a1, zero, e32,m2,tu,mu +; CHECK-NEXT: vadd.vv v26, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,tu,mu +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: ret + %va = tail call @llvm.riscv.VLE32.V.nxv4i32.p0i32( * %pa) + %vb = tail call @llvm.riscv.VLE32.V.nxv4i32.p0i32( * %pb) + %vc = tail call @llvm.riscv.VADD.VV.nxv4i32( %va, %vb) + tail call void @llvm.riscv.VSE32.V.nxv4i32.p0nxv4i32( %vc, * %pc) + ret void +} + +define void @vadd_vint32m4( *%pc, *%pa, *%pb) nounwind { +; CHECK-LABEL: vadd_vint32m4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a3, zero, e32,m4,tu,mu +; CHECK-NEXT: vle32.v v28, (a1) +; CHECK-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; CHECK-NEXT: vadd.vv v28, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,tu,mu +; CHECK-NEXT: vse32.v v28, (a0) +; CHECK-NEXT: ret + %va = tail call @llvm.riscv.VLE32.V.nxv8i32.p0i32( * %pa) + %vb = tail call @llvm.riscv.VLE32.V.nxv8i32.p0i32( * %pb) + %vc = tail call @llvm.riscv.VADD.VV.nxv8i32( %va, %vb) + tail call void @llvm.riscv.VSE32.V.nxv8i32.p0nxv8i32( %vc, * %pc) + ret void +} + +define void @vadd_vint32m8( *%pc, *%pa, *%pb) nounwind { +; CHECK-LABEL: vadd_vint32m8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a3, zero, e32,m8,tu,mu +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,tu,mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %va = tail call @llvm.riscv.VLE32.V.nxv16i32.p0i32( * %pa) + %vb = tail call @llvm.riscv.VLE32.V.nxv16i32.p0i32( * %pb) + %vc = tail call @llvm.riscv.VADD.VV.nxv16i32( %va, %vb) + tail call void @llvm.riscv.VSE32.V.nxv16i32.p0nxv16i32( %vc, * %pc) + ret void +} + +define void @vadd_vint32mf2( *%pc, *%pa, *%pb) nounwind { +; CHECK-LABEL: vadd_vint32mf2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a3, zero, e32,mf2,tu,mu +; CHECK-NEXT: vle32.v v25, (a1) +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,tu,mu +; CHECK-NEXT: vle32.v v26, (a2) +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,tu,mu +; CHECK-NEXT: vadd.vv v25, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,tu,mu +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %va = tail call @llvm.riscv.VLE32.V.nxv1i32.p0i32( * %pa) + %vb = tail call @llvm.riscv.VLE32.V.nxv1i32.p0i32( * %pb) + %vc = tail call @llvm.riscv.VADD.VV.nxv1i32( %va, %vb) + tail call void @llvm.riscv.VSE32.V.nxv1i32.p0nxv1i32( %vc, * %pc) + ret void +} +declare @llvm.riscv.VLE32.V.nxv2i32.p0nxv2i32(*) +declare @llvm.riscv.VLE32.V.nxv4i32.p0i32(*) +declare @llvm.riscv.VLE32.V.nxv8i32.p0i32(*) +declare @llvm.riscv.VLE32.V.nxv16i32.p0i32(*) +declare @llvm.riscv.VLE32.V.nxv1i32.p0i32(*) + +declare @llvm.riscv.VADD.VV.nxv2i32(, ) +declare @llvm.riscv.VADD.VV.nxv4i32(, ) +declare @llvm.riscv.VADD.VV.nxv8i32(, ) +declare @llvm.riscv.VADD.VV.nxv16i32(, ) +declare @llvm.riscv.VADD.VV.nxv1i32(, ) + +declare void @llvm.riscv.VSE32.V.nxv2i32.p0nxv2i32(, *) +declare void @llvm.riscv.VSE32.V.nxv4i32.p0nxv4i32(, *) +declare void @llvm.riscv.VSE32.V.nxv8i32.p0nxv8i32(, *) +declare void @llvm.riscv.VSE32.V.nxv16i32.p0nxv16i32(, *) +declare void @llvm.riscv.VSE32.V.nxv1i32.p0nxv1i32(, *) +