Index: llvm/include/llvm/IR/IntrinsicsRISCV.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsRISCV.td +++ llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1516,6 +1516,16 @@ [NoCapture>, IntrReadMem]>; } + // Segment stores for fixed vectors. + foreach nf = [2, 3, 4, 5, 6, 7, 8] in { + def int_riscv_seg # nf # _store + : Intrinsic<[], + !listconcat([llvm_anyptr_ty], [llvm_anyvector_ty], !listsplat(LLVMMatchType<1>, + !add(nf, -1)), + [llvm_anyint_ty]), + [NoCapture>, IntrWriteMem]>; + } + } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1142,6 +1142,23 @@ Info.size = MemoryLocation::UnknownSize; Info.flags |= MachineMemOperand::MOLoad; return true; + case Intrinsic::riscv_seg2_store: + case Intrinsic::riscv_seg3_store: + case Intrinsic::riscv_seg4_store: + case Intrinsic::riscv_seg5_store: + case Intrinsic::riscv_seg6_store: + case Intrinsic::riscv_seg7_store: + case Intrinsic::riscv_seg8_store: + Info.opc = ISD::INTRINSIC_VOID; + Info.ptrVal = I.getArgOperand(0); + Info.memVT = + getValueType(DL, I.getArgOperand(0)->getType()->getScalarType()); + Info.align = Align( + DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) / + 8); + Info.size = MemoryLocation::UnknownSize; + Info.flags |= MachineMemOperand::MOStore; + return true; } } @@ -5037,6 +5054,44 @@ Ops, Store->getMemoryVT(), Store->getMemOperand()); } + case Intrinsic::riscv_seg2_store: + case Intrinsic::riscv_seg3_store: + case Intrinsic::riscv_seg4_store: + case Intrinsic::riscv_seg5_store: + case Intrinsic::riscv_seg6_store: + case Intrinsic::riscv_seg7_store: + case Intrinsic::riscv_seg8_store: { + SDLoc DL(Op); + static const Intrinsic::ID VssegInts[7] = { + Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, + Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, + Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, + Intrinsic::riscv_vsseg8}; + unsigned NF = Op->getNumOperands() - 4; + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Val = Op.getOperand(3); + MVT VT = Val->getSimpleValueType(0); + MVT ContainerVT = getContainerForFixedLengthVector(VT); + + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); + SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); + + auto *Store = cast(Op); + SmallVector ContainerVTs(NF, ContainerVT); + ContainerVTs.push_back(MVT::Other); + SmallVector Ops{Store->getChain(), IntID}; + for (unsigned int OpsIdx = 0; OpsIdx < NF; OpsIdx++) + Ops.push_back(convertToScalableVector( + ContainerVTs[OpsIdx], Op.getOperand(OpsIdx + 3), DAG, Subtarget)); + + Ops.push_back(Op.getOperand(2)); + Ops.push_back(VL); + + return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), + Ops, Store->getMemoryVT(), + Store->getMemOperand()); + } } return SDValue(); Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-store.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-store.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s + +define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { +; CHECK-LABEL: store_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9 def $v8_v9 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9 def $v8_v9 +; CHECK-NEXT: vsseg2e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <16 x i8>* %ptr to i8* + call void @llvm.riscv.seg2.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, i64 8) + ret void +} +define void @store_factor3(<24 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2) { +; CHECK-LABEL: store_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10 def $v8_v9_v10 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10 def $v8_v9_v10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10 def $v8_v9_v10 +; CHECK-NEXT: vsseg3e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <24 x i8>* %ptr to i8* + call void @llvm.riscv.seg3.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, i64 8) + ret void +} +define void @store_factor4(<32 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { +; CHECK-LABEL: store_factor4: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v11 killed $v11 killed $v8_v9_v10_v11 def $v8_v9_v10_v11 +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10_v11 def $v8_v9_v10_v11 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10_v11 def $v8_v9_v10_v11 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10_v11 def $v8_v9_v10_v11 +; CHECK-NEXT: vsseg4e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <32 x i8>* %ptr to i8* + call void @llvm.riscv.seg4.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, i64 8) + ret void +} +define void @store_factor5(<40 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4) { +; CHECK-LABEL: store_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v12 killed $v12 killed $v8_v9_v10_v11_v12 def $v8_v9_v10_v11_v12 +; CHECK-NEXT: # kill: def $v11 killed $v11 killed $v8_v9_v10_v11_v12 def $v8_v9_v10_v11_v12 +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10_v11_v12 def $v8_v9_v10_v11_v12 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10_v11_v12 def $v8_v9_v10_v11_v12 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10_v11_v12 def $v8_v9_v10_v11_v12 +; CHECK-NEXT: vsseg5e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <40 x i8>* %ptr to i8* + call void @llvm.riscv.seg5.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, i64 8) + ret void +} +define void @store_factor6(<48 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5) { +; CHECK-LABEL: store_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v13 killed $v13 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: # kill: def $v12 killed $v12 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: # kill: def $v11 killed $v11 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10_v11_v12_v13 def $v8_v9_v10_v11_v12_v13 +; CHECK-NEXT: vsseg6e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <48 x i8>* %ptr to i8* + call void @llvm.riscv.seg6.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, i64 8) + ret void +} +define void @store_factor7(<56 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6) { +; CHECK-LABEL: store_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v14 killed $v14 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: # kill: def $v13 killed $v13 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: # kill: def $v12 killed $v12 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: # kill: def $v11 killed $v11 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10_v11_v12_v13_v14 def $v8_v9_v10_v11_v12_v13_v14 +; CHECK-NEXT: vsseg7e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <56 x i8>* %ptr to i8* + call void @llvm.riscv.seg7.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, i64 8) + ret void +} +define void @store_factor8(<64 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7) { +; CHECK-LABEL: store_factor8: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v15 killed $v15 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v14 killed $v14 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v13 killed $v13 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v12 killed $v12 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v11 killed $v11 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v10 killed $v10 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: # kill: def $v9 killed $v9 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8_v9_v10_v11_v12_v13_v14_v15 def $v8_v9_v10_v11_v12_v13_v14_v15 +; CHECK-NEXT: vsseg8e8.v v8, (a0) +; CHECK-NEXT: ret + %1 = bitcast <64 x i8>* %ptr to i8* + call void @llvm.riscv.seg8.store.p0i8.v8i8.i64(i8* %1, <8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, i64 8) + ret void +} +declare void @llvm.riscv.seg2.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg3.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg4.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg5.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg6.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg7.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64) +declare void @llvm.riscv.seg8.store.p0i8.v8i8.i64(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64)