diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -51,6 +51,8 @@ unsigned XLen = 32; unsigned ZvlLen = 0; MVT XLenVT = MVT::i32; + unsigned RVVVectorBitsMin; + unsigned RVVVectorBitsMax; uint8_t MaxInterleaveFactor = 2; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; std::bitset UserReservedRegister; @@ -71,7 +73,8 @@ public: // Initializes the data members to match that of the specified triple. RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, - StringRef FS, StringRef ABIName, const TargetMachine &TM); + StringRef FS, StringRef ABIName, unsigned RVVVectorBitsMin, + unsigned RVVVectorLMULMax, const TargetMachine &TM); // Parses features string setting specified subtarget options. The // definition of this function is auto-generated by tblgen. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -32,20 +32,6 @@ static cl::opt EnableSubRegLiveness("riscv-enable-subreg-liveness", cl::init(false), cl::Hidden); -static cl::opt RVVVectorBitsMax( - "riscv-v-vector-bits-max", - cl::desc("Assume V extension vector registers are at most this big, " - "with zero meaning no maximum size is assumed."), - cl::init(0), cl::Hidden); - -static cl::opt RVVVectorBitsMin( - "riscv-v-vector-bits-min", - cl::desc("Assume V extension vector registers are at least this big, " - "with zero meaning no minimum size is assumed. A value of -1 " - "means use Zvl*b extension. This is primarily used to enable " - "autovectorization with fixed width vectors."), - cl::init(-1), cl::Hidden); - static cl::opt RVVVectorLMULMax( "riscv-v-fixed-length-vector-lmul-max", cl::desc("The maximum LMUL value to use for fixed length vectors. " @@ -89,8 +75,11 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, - StringRef ABIName, const TargetMachine &TM) + StringRef ABIName, unsigned RVVVectorBitsMin, + unsigned RVVVectorBitsMax, + const TargetMachine &TM) : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS), + RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { @@ -137,55 +126,30 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { assert(hasVInstructions() && "Tried to get vector length without Zve or V extension support!"); - if (RVVVectorBitsMax == 0) - return 0; // ZvlLen specifies the minimum required vlen. The upper bound provided by // riscv-v-vector-bits-max should be no less than it. - if (RVVVectorBitsMax < (int)ZvlLen) + if (RVVVectorBitsMax != 0 && RVVVectorBitsMax < ZvlLen) report_fatal_error("riscv-v-vector-bits-max specified is lower " "than the Zvl*b limitation"); - // FIXME: Change to >= 32 when VLEN = 32 is supported - assert( - RVVVectorBitsMax >= 64 && RVVVectorBitsMax <= 65536 && - isPowerOf2_32(RVVVectorBitsMax) && - "V or Zve* extension requires vector length to be in the range of 64 to " - "65536 and a power of 2!"); - assert(RVVVectorBitsMax >= RVVVectorBitsMin && - "Minimum V extension vector length should not be larger than its " - "maximum!"); - unsigned Max = std::max(RVVVectorBitsMin, RVVVectorBitsMax); - return PowerOf2Floor((Max < 64 || Max > 65536) ? 0 : Max); + return RVVVectorBitsMax; } unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const { assert(hasVInstructions() && "Tried to get vector length without Zve or V extension support!"); - if (RVVVectorBitsMin == -1) + if (RVVVectorBitsMin == -1U) return ZvlLen; // ZvlLen specifies the minimum required vlen. The lower bound provided by // riscv-v-vector-bits-min should be no less than it. - if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < (int)ZvlLen) + if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen) report_fatal_error("riscv-v-vector-bits-min specified is lower " "than the Zvl*b limitation"); - // FIXME: Change to >= 32 when VLEN = 32 is supported - assert( - (RVVVectorBitsMin == 0 || - (RVVVectorBitsMin >= 64 && RVVVectorBitsMin <= 65536 && - isPowerOf2_32(RVVVectorBitsMin))) && - "V or Zve* extension requires vector length to be in the range of 64 to " - "65536 and a power of 2!"); - assert((RVVVectorBitsMax >= RVVVectorBitsMin || RVVVectorBitsMax == 0) && - "Minimum V extension vector length should not be larger than its " - "maximum!"); - unsigned Min = RVVVectorBitsMin; - if (RVVVectorBitsMax != 0) - Min = std::min(RVVVectorBitsMin, RVVVectorBitsMax); - return PowerOf2Floor((Min < 64 || Min > 65536) ? 0 : Min); + return RVVVectorBitsMin; } unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -53,6 +53,20 @@ cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +static cl::opt RVVVectorBitsMaxOpt( + "riscv-v-vector-bits-max", + cl::desc("Assume V extension vector registers are at most this big, " + "with zero meaning no maximum size is assumed."), + cl::init(0), cl::Hidden); + +static cl::opt RVVVectorBitsMinOpt( + "riscv-v-vector-bits-min", + cl::desc("Assume V extension vector registers are at least this big, " + "with zero meaning no minimum size is assumed. A value of -1 " + "means use Zvl*b extension. This is primarily used to enable " + "autovectorization with fixed width vectors."), + cl::init(-1), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -109,7 +123,53 @@ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; - std::string Key = CPU + TuneCPU + FS; + + unsigned RVVBitsMin = RVVVectorBitsMinOpt; + unsigned RVVBitsMax = RVVVectorBitsMaxOpt; + Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); + if (VScaleRangeAttr.isValid()) { + if (!RVVVectorBitsMinOpt.getNumOccurrences()) + RVVBitsMin = VScaleRangeAttr.getVScaleRangeMin() * RISCV::RVVBitsPerBlock; + std::optional VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); + if (VScaleMax.has_value() && !RVVVectorBitsMaxOpt.getNumOccurrences()) + RVVBitsMax = *VScaleMax * RISCV::RVVBitsPerBlock; + } + + if (RVVBitsMin != -1U) { + // FIXME: Change to >= 32 when VLEN = 32 is supported. + assert((RVVBitsMin == 0 || (RVVBitsMin >= 64 && RVVBitsMin <= 65536 && + isPowerOf2_32(RVVBitsMin))) && + "V or Zve* extension requires vector length to be in the range of " + "64 to 65536 and a power 2!"); + assert((RVVBitsMax >= RVVBitsMin || RVVBitsMax == 0) && + "Minimum V extension vector length should not be larger than its " + "maximum!"); + } + assert((RVVBitsMax == 0 || (RVVBitsMax >= 64 && RVVBitsMax <= 65536 && + isPowerOf2_32(RVVBitsMax))) && + "V or Zve* extension requires vector length to be in the range of " + "64 to 65536 and a power 2!"); + + if (RVVBitsMin != -1U) { + if (RVVBitsMax != 0) { + RVVBitsMin = std::min(RVVBitsMin, RVVBitsMax); + RVVBitsMax = std::max(RVVBitsMin, RVVBitsMax); + } + + RVVBitsMin = + PowerOf2Floor((RVVBitsMin < 64 || RVVBitsMin > 65536) ? 0 : RVVBitsMin); + } + RVVBitsMax = + PowerOf2Floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax); + + SmallString<512> Key; + Key += "RVVMin"; + Key += std::to_string(RVVBitsMin); + Key += "RVVMax"; + Key += std::to_string(RVVBitsMax); + Key += CPU; + Key += TuneCPU; + Key += FS; auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -126,7 +186,8 @@ } ABIName = ModuleTargetABI->getString(); } - I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, ABIName, *this); + I = std::make_unique( + TargetTriple, CPU, TuneCPU, FS, ABIName, RVVBitsMin, RVVBitsMax, *this); } return I.get(); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s + +define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { +; CHECK-LABEL: vadd_v512i8_zvl128: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: addi a2, a3, 128 +; CHECK-NEXT: addi a4, a3, 384 +; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a3, 256 +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v16, (a4) +; CHECK-NEXT: vle8.v v24, (a2) +; CHECK-NEXT: vle8.v v0, (a3) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v0, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v0, v24, v0 +; CHECK-NEXT: vse8.v v0, (a0) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vse8.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vse8.v v16, (a1) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl256(<512 x i8> %a, <512 x i8> %b) #1 { +; CHECK-LABEL: vadd_v512i8_zvl256: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: li a2, 256 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vle8.v v0, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl512(<512 x i8> %a, <512 x i8> %b) #2 { +; CHECK-LABEL: vadd_v512i8_zvl512: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl1024(<512 x i8> %a, <512 x i8> %b) #3 { +; CHECK-LABEL: vadd_v512i8_zvl1024: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl2048(<512 x i8> %a, <512 x i8> %b) #4 { +; CHECK-LABEL: vadd_v512i8_zvl2048: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl4096(<512 x i8> %a, <512 x i8> %b) #5 { +; CHECK-LABEL: vadd_v512i8_zvl4096: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +attributes #0 = { vscale_range(2,1024) } +attributes #1 = { vscale_range(4,1024) } +attributes #2 = { vscale_range(8,1024) } +attributes #3 = { vscale_range(16,1024) } +attributes #4 = { vscale_range(32,1024) } +attributes #5 = { vscale_range(64,1024) } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -2381,5 +2381,5 @@ ret %res } -attributes #0 = { vscale_range(2,2) } +attributes #0 = { vscale_range(2, 0) } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=riscv64 -mattr=+v -passes=loop-vectorize < %s | FileCheck %s +; RUN: opt -S -mtriple=riscv64 -mattr=+zve32x -passes=loop-vectorize < %s | FileCheck %s define void @small_trip_count_min_vlen_128(i32* nocapture %a) nounwind vscale_range(4,1024) { ; CHECK-LABEL: @small_trip_count_min_vlen_128(