Index: llvm/include/llvm/IR/IntrinsicsRISCV.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsRISCV.td +++ llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1463,6 +1463,16 @@ [llvm_anyvector_ty, llvm_anyptr_ty, llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [NoCapture>, IntrWriteMem]>; + + // Segment loads for fixed vectors. + foreach nf = [2, 3, 4, 5, 6, 7, 8] in { + def int_riscv_seg # nf # _load + : Intrinsic, + !add(nf, -1))), + [llvm_anyptr_ty, llvm_anyint_ty], + [NoCapture>, IntrReadMem]>; + } + } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1152,6 +1152,24 @@ Info.size = MemoryLocation::UnknownSize; Info.flags |= MachineMemOperand::MOStore; return true; + case Intrinsic::riscv_seg2_load: + case Intrinsic::riscv_seg3_load: + case Intrinsic::riscv_seg4_load: + case Intrinsic::riscv_seg5_load: + case Intrinsic::riscv_seg6_load: + case Intrinsic::riscv_seg7_load: + case Intrinsic::riscv_seg8_load: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + Info.memVT = + getValueType(DL, I.getType()->getStructElementType(0)->getScalarType()); + Info.align = + Align(DL.getTypeSizeInBits( + I.getType()->getStructElementType(0)->getScalarType()) / + 8); + Info.size = MemoryLocation::UnknownSize; + Info.flags |= MachineMemOperand::MOLoad; + return true; } } @@ -4803,6 +4821,49 @@ Result = convertFromScalableVector(VT, Result, DAG, Subtarget); return DAG.getMergeValues({Result, Chain}, DL); } + case Intrinsic::riscv_seg2_load: + case Intrinsic::riscv_seg3_load: + case Intrinsic::riscv_seg4_load: + case Intrinsic::riscv_seg5_load: + case Intrinsic::riscv_seg6_load: + case Intrinsic::riscv_seg7_load: + case Intrinsic::riscv_seg8_load: { + SDLoc DL(Op); + static const Intrinsic::ID VlsegInts[7] = { + Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, + Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, + Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, + Intrinsic::riscv_vlseg8}; + unsigned NF = Op->getNumValues() - 1; + assert(NF >= 2 && NF <= 8 && "Unexcepted seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getSimpleValueType(0); + MVT ContainerVT = getContainerForFixedLengthVector(VT); + + SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); + SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); + auto *Load = cast(Op); + SmallVector Ops{Load->getChain(), IntID}; + Ops.push_back(Op.getOperand(2)); // Ptr + Ops.push_back(VL); + + SmallVector ContainerVTs; + for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) { + ContainerVTs.push_back(ContainerVT); + } + ContainerVTs.push_back(MVT::Other); + SDVTList VTs = DAG.getVTList(ContainerVTs); + SDValue Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + Load->getMemoryVT(), Load->getMemOperand()); + SmallVector Results; + for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) { + Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), + DAG, Subtarget)); + } + Results.push_back(Result.getValue(NF)); + return DAG.getMergeValues(Results, DL); + } } return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s + +define <8 x i8> @load_factor2(<16 x i8>* %ptr) { +; CHECK-LABEL: load_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <16 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8> } %2, 1 + ret <8 x i8> %4 +} + +define <8 x i8> @load_factor3(<24 x i8>* %ptr) { +; CHECK-LABEL: load_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <24 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + ret <8 x i8> %5 +} + +define <8 x i8> @load_factor4(<32 x i8>* %ptr) { +; CHECK-LABEL: load_factor4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg4e8.v v5, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <32 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3 + ret <8 x i8> %6 +} + +define <8 x i8> @load_factor5(<40 x i8>* %ptr) { +; CHECK-LABEL: load_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg5e8.v v4, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <40 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3 + %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4 + ret <8 x i8> %7 +} + +define <8 x i8> @load_factor6(<48 x i8>* %ptr) { +; CHECK-LABEL: load_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg6e8.v v3, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <48 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3 + %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4 + %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5 + ret <8 x i8> %8 +} + +define <8 x i8> @load_factor7(<56 x i8>* %ptr) { +; CHECK-LABEL: load_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg7e8.v v2, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <56 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3 + %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4 + %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5 + %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6 + ret <8 x i8> %9 +} + +define <8 x i8> @load_factor8(<64 x i8>* %ptr) { +; CHECK-LABEL: load_factor8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vlseg8e8.v v1, (a0) +; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8 +; CHECK-NEXT: ret + %1 = bitcast <64 x i8>* %ptr to i8* + %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8) + %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0 + %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1 + %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2 + %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3 + %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4 + %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5 + %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6 + %10 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 7 + ret <8 x i8> %10 +} +declare { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8*, i64) +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8*, i64)