diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -302,6 +302,14 @@ Instruction *I = nullptr) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; + + unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const; + bool lowerInterleavedLoad(LoadInst *LI, + ArrayRef Shuffles, + ArrayRef Indices, + unsigned Factor) const override; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20,6 +20,7 @@ #include "RISCVTargetMachine.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -948,6 +949,102 @@ return isInt<12>(Imm); } +unsigned +RISCVTargetLowering::getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const { + return 1; +} + +bool RISCVTargetLowering::lowerInterleavedLoad( + LoadInst *LI, ArrayRef Shuffles, + ArrayRef Indices, unsigned Factor) const { + assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && + "Invalid interleave factor"); + assert(!Shuffles.empty() && "Empty shufflevector input"); + assert(Shuffles.size() == Indices.size() && + "Unmatched number of shufflevectors and indices"); + + const DataLayout &DL = LI->getModule()->getDataLayout(); + + VectorType *VTy = Shuffles[0]->getType(); + + unsigned NumLoads = getNumInterleavedAccesses(VTy, DL); + + auto *FVTy = cast(VTy); + + Type *EltTy = FVTy->getElementType(); + if (EltTy->isPointerTy()) + FVTy = + FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements()); + + IRBuilder<> Builder(LI); + + // The base address of the load. + Value *BaseAddr = LI->getPointerOperand(); + + if (NumLoads > 1) { + FVTy = FixedVectorType::get(FVTy->getElementType(), + FVTy->getNumElements() / NumLoads); + + BaseAddr = Builder.CreateBitCast( + BaseAddr, + FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())); + } + + Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace()); + Type *Tys[2] = {FVTy, PtrTy}; + static const Intrinsic::ID LoadInts[3] = {Intrinsic::riscv_vlseg2, + Intrinsic::riscv_vlseg3, + Intrinsic::riscv_vlseg4}; + Function *LdNFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); + + // Holds sub-vectors extracted from the load intrinsic return values. The + // sub-vectors are associated with the shufflevector instructions they will + // replace. + DenseMap> SubVecs; + + for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { + + // If we're generating more than one load, compute the base address of + // subsequent loads as an offset from the previous. + if (LoadCount > 0) + BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr, + FVTy->getNumElements() * Factor); + + CallInst *LdN = Builder.CreateCall( + LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "vlsegN"); + + // Extract and store the sub-vectors returned by the load intrinsic. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SVI = Shuffles[i]; + unsigned Index = Indices[i]; + + Value *SubVec = Builder.CreateExtractValue(LdN, Index); + + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr( + SubVec, FixedVectorType::get(SVI->getType()->getElementType(), + FVTy->getNumElements())); + SubVecs[SVI].push_back(SubVec); + } + } + + // Replace uses of the shufflevector instructions with the sub-vectors + // returned by the load intrinsic. If a shufflevector instruction is + // associated with more than one sub-vector, those sub-vectors will be + // concatenated into a single wide vector. + for (ShuffleVectorInst *SVI : Shuffles) { + auto &SubVec = SubVecs[SVI]; + auto *WideVec = + SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; + SVI->replaceAllUsesWith(WideVec); + } + + return true; +} + // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used.