diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -339,6 +339,15 @@ Instruction *I = nullptr) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; + + unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + bool isLegalInterleavedAccessType(VectorType *VecTy, + const DataLayout &DL) const; + bool lowerInterleavedLoad(LoadInst *LI, + ArrayRef Shuffles, + ArrayRef Indices, + unsigned Factor) const override; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -1183,6 +1184,122 @@ return isInt<12>(Imm); } +bool RISCVTargetLowering::isLegalInterleavedAccessType( + VectorType *VecTy, const DataLayout &DL) const { + + unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); + + unsigned NumElements = + isa(VecTy) + ? cast(VecTy)->getMinNumElements() + : cast(VecTy)->getNumElements(); + // Ensure the number of vector elements is greater than 1. + if (NumElements < 2) + return false; + + // Ensure the element type is legal. + if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64) + return false; + + if (VecTy->getElementType()->isHalfTy()) + return Subtarget.hasStdExtZfh(); + if (VecTy->getElementType()->isFloatTy()) + return Subtarget.hasStdExtF(); + if (VecTy->getElementType()->isDoubleTy()) + return Subtarget.hasStdExtD(); + + return true; +} + +bool RISCVTargetLowering::lowerInterleavedLoad( + LoadInst *LI, ArrayRef Shuffles, + ArrayRef Indices, unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!Shuffles.empty() && "Empty shufflevector input"); + assert(Shuffles.size() == Indices.size() && + "Unmatched number of shufflevectors and indices"); + + const DataLayout &DL = LI->getModule()->getDataLayout(); + + VectorType *VTy = Shuffles[0]->getType(); + + // Skip if we do not have StdExtV and skip illegal vector types. + if (!Subtarget.hasStdExtV() || !isLegalInterleavedAccessType(VTy, DL)) + return false; + + auto *FVTy = cast(VTy); + // FIXME: Support large vectors. + if (DL.getTypeSizeInBits(FVTy->getElementType()) * FVTy->getNumElements() > + Subtarget.getMinRVVVectorSizeInBits() * 8) + return false; + + // A pointer vector can not be the return type of the vlsegN intrinsics. Need + // to load integer vectors first and then convert to pointer vectors. + Type *EltTy = FVTy->getElementType(); + if (EltTy->isPointerTy()) + FVTy = + FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements()); + + IRBuilder<> Builder(LI); + + // The base address of the load. + Value *BaseAddr = LI->getPointerOperand(); + + Type *PtrTy = + FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()); + Type *IntTy; + Triple T = getTargetMachine().getTargetTriple(); + if (T.isArch64Bit()) + IntTy = IntegerType::get(FVTy->getContext(), 64); + else { + assert(T.isArch32Bit() && "only RV32 and RV64 are currently supported"); + IntTy = IntegerType::get(FVTy->getContext(), 32); + } + + Type *Tys[] = {FVTy, PtrTy, IntTy}; + static const Intrinsic::ID VlsegInts[3] = {Intrinsic::riscv_seg2_load, + Intrinsic::riscv_seg3_load, + Intrinsic::riscv_seg4_load}; + Function *VlsegNFunc = + Intrinsic::getDeclaration(LI->getModule(), VlsegInts[Factor - 2], Tys); + + DenseMap> SubVecs; + + Value *VL = + ConstantInt::get(IntTy, cast(VTy)->getNumElements()); + CallInst *VlsegN = Builder.CreateCall( + VlsegNFunc, {Builder.CreateBitCast(BaseAddr, PtrTy), VL}, "vlsegN"); + + // Extract and store the sub-vectors returned by the load intrinsic. + for (unsigned I = 0; I < Shuffles.size(); I++) { + ShuffleVectorInst *SVI = Shuffles[I]; + unsigned Index = Indices[I]; + + Value *SubVec = Builder.CreateExtractValue(VlsegN, Index); + + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr( + SubVec, FixedVectorType::get(SVI->getType()->getElementType(), + FVTy->getNumElements())); + SubVecs[SVI].push_back(SubVec); + } + + // Replace uses of the shufflevector instructions with the sub-vectors + // returned by the load intrinsic. If a shufflevector instruction is + // associated with more than one sub-vector, those sub-vectors will be + // concatenated into a single wide vector. + for (ShuffleVectorInst *SVI : Shuffles) { + auto &SubVec = SubVecs[SVI]; + auto *WideVec = + SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; + SVI->replaceAllUsesWith(WideVec); + } + + return true; +} + // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used. diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -162,6 +162,7 @@ addPass(createRISCVGatherScatterLoweringPass()); TargetPassConfig::addIRPasses(); + addPass(createInterleavedAccessPass()); } bool RISCVPassConfig::addInstSelector() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -31,6 +31,7 @@ ; CHECK-NEXT: Expand vector predication intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -57,6 +57,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple riscv64-linux-gnu -interleaved-access -mattr=+v -riscv-v-vector-bits-min=128 \ +; RUN: -S < %s | FileCheck %s + +define <8 x i8> @load_factor2(<16 x i8>* %ptr) { +; CHECK-LABEL: @load_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[PTR:%.*]] to i8* +; CHECK-NEXT: [[VLSEGN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* [[TMP1]], i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLSEGN]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLSEGN]], 0 +; CHECK-NEXT: ret <8 x i8> [[TMP2]] +; + %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 + %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> + %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> + ret <8 x i8> %v1 +} + +define <4 x i32> @load_factor3(<12 x i32>* %ptr) { +; CHECK-LABEL: @load_factor3( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[VLSEGN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.v4i32.p0i32.i64(i32* [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 0 +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4 + %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> + %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> + %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> + ret <4 x i32> %v2 +} + +define <4 x i32> @load_factor4(<16 x i32>* %ptr) { +; CHECK-LABEL: @load_factor4( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[VLSEGN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.v4i32.p0i32.i64(i32* [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 0 +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4 + %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> + %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> + %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> + %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> + ret <4 x i32> %v3 +} diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'RISCV' in config.root.targets: + config.unsupported = True