diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -340,6 +340,13 @@ Value *NewVal, Value *Mask, AtomicOrdering Ord) const override; + /// Returns true if the target allows unaligned memory accesses of the + /// specified type. + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, @@ -394,6 +401,11 @@ bool useRVVForFixedLengthVectorVT(MVT VT) const; }; +namespace RISCV { +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; +}; // namespace RISCV + namespace RISCVVIntrinsicsTable { struct RISCVVIntrinsicInfo { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5102,6 +5102,22 @@ return true; } +bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (!VT.isScalableVector()) + return false; + + EVT ElemVT = VT.getVectorElementType(); + if (Alignment >= ElemVT.getStoreSize()) { + if (Fast) + *Fast = true; + return true; + } + + return false; +} + #define GET_REGISTER_MATCHER #include "RISCVGenAsmMatcher.inc" diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -134,6 +134,7 @@ assert(i < RISCV::NUM_TARGET_REGS && "Register out of range"); return UserReservedRegister[i]; } + unsigned getMaxVectorSizeInBits() const; protected: // GlobalISel related APIs. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -39,6 +39,11 @@ "Fractional LMUL values are not supported."), cl::init(8), cl::Hidden); +static cl::opt VectorBitsMax( + "riscv-vector-bits-max", + cl::desc("Assume RISC-V vector registers are at most this big"), + cl::init(0), cl::Hidden); + void RISCVSubtarget::anchor() {} RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies( @@ -62,6 +67,11 @@ return *this; } +unsigned RISCVSubtarget::getMaxVectorSizeInBits() const { + assert(HasStdExtV && "Tried to get vector length without V support!"); + return VectorBitsMax; +} + RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName, const TargetMachine &TM) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -49,6 +49,8 @@ Type *Ty, TTI::TargetCostKind CostKind); bool shouldExpandReduction(const IntrinsicInst *II) const; + bool supportsScalableVectors() const { return ST->hasStdExtV(); } + Optional getMaxVScale() const; }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -120,3 +120,17 @@ return true; } } + +Optional RISCVTTIImpl::getMaxVScale() const { + // There is no assumption of the maximum vector length in V specification. + // We use the value specified by users as the maximum vector length. + // This function will use the assumed maximum vector length to get the + // maximum vscale for LoopVectorizer. + // If users do not specify the maximum vector length, we have no way to + // know whether the LoopVectorizer is safe to do or not. + // We only consider to use single vector register (LMUL = 1) to vectorize. + unsigned MaxVectorSizeInBits = ST->getMaxVectorSizeInBits(); + if (ST->hasStdExtV() && MaxVectorSizeInBits != 0) + return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock; + return BaseT::getMaxVScale(); +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg @@ -0,0 +1,4 @@ +config.suffixes = ['.ll'] + +if not 'RISCV' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll @@ -0,0 +1,37 @@ +; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \ +; RUN: -riscv-vector-bits-max=512 -S < %s 2>&1 \ +; RUN: | FileCheck %s + +; void test(int *a, int *b, int N) { +; #pragma clang loop vectorize(enable) vectorize_width(2, scalable) +; for (int i=0; i +define void @test(i32* %a, i32* %b) { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %2 = add nuw nsw i64 %iv, 64 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6 + +exit: + ret void +} + +!6 = !{!6, !7, !8} +!7 = !{!"llvm.loop.vectorize.width", i32 2} +!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}