diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -126,6 +126,14 @@ bool enableSaveRestore() const { return EnableSaveRestore; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } + unsigned getFLen() const { + if (HasStdExtD) + return 64; + else if (HasStdExtF) + return 32; + else + return 0; + } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isRegisterReservedByUser(Register i) const { assert(i < RISCV::NUM_TARGET_REGS && "Register out of range"); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -84,7 +84,9 @@ } TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName); - RISCVFeatures::validate(TT, getFeatureBits()); + auto Features = getFeatureBits(); + RISCVFeatures::validate(TT, Features); + return *this; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -60,6 +60,8 @@ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; + InstructionCost getRegUsageForType(Type *Ty) const; + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); @@ -176,6 +178,20 @@ // Let regular unroll to unroll the loop. return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); } + + // TODO: We should define RISC-V's own register classes. + // e.g. register class for FPR. + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); + if (Vector) { + if (ST->hasVInstructions()) + return 32; + return 0; + } + // 31 = 32 GPR - x0 (zero register) + // FIXME: Should we exclude fixed registers like SP, TP or GP? + return 31; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -278,3 +278,20 @@ TTI::PeelingPreferences &PP) { BaseT::getPeelingPreferences(L, SE, PP); } + +InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) const { + auto Size = Ty->getPrimitiveSizeInBits(); + if (Ty->isVectorTy()) { + if (Size.isScalable()) + return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock); + else if (ST->useRVVForFixedLengthVectors()) + return divideCeil(Size, ST->getMinRVVVectorSizeInBits()); + } + + auto FLen = ST->getFLen(); + if (FLen && Ty->isFloatingPointTy()) { + return divideCeil(Size, FLen); + } + + return divideCeil(Size, ST->getXLen()); +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -0,0 +1,62 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \ +; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1 +; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \ +; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2 +; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \ +; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4 +; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ +; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \ +; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8 + +define void @add(float* noalias nocapture readonly %src1, float* noalias nocapture readonly %src2, i32 signext %size, float* noalias nocapture writeonly %result) { +; CHECK-LABEL: add +; CHECK-LMUL1: LV(REG): Found max usage: 2 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-LMUL2: LV(REG): Found max usage: 2 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 4 registers +; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 4 registers +; CHECK-LMUL4: LV(REG): Found max usage: 2 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 8 registers +; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 8 registers +; CHECK-LMUL8: LV(REG): Found max usage: 2 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 16 registers +; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 16 registers + +entry: + %conv = zext i32 %size to i64 + %cmp10.not = icmp eq i32 %size, 0 + br i1 %cmp10.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %src1, i64 %i.011 + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %src2, i64 %i.011 + %1 = load float, float* %arrayidx2, align 4 + %add = fadd float %0, %1 + %arrayidx3 = getelementptr inbounds float, float* %result, i64 %i.011 + store float %add, float* %arrayidx3, align 4 + %add4 = add nuw nsw i64 %i.011, 1 + %exitcond.not = icmp eq i64 %add4, %conv + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +}