diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -96,7 +96,11 @@ DiagnosticsEngine &Diags) override; bool hasBitIntType() const override { return true; } + + Optional> + getVScaleRange(const LangOptions &LangOpts) const override; }; + class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo { public: RISCV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -277,6 +277,16 @@ return true; } +Optional> +RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const { + if (ISAInfo->getMinVLen() < llvm::RISCV::RVVVLENBitsMin) + return None; + // RVV's vscale is defined as VLEN/RVVBitsPerBlock. + return std::make_pair(ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock, + llvm::RISCV::RVVVLENBitsMax / + llvm::RISCV::RVVBitsPerBlock); +} + bool RISCV32TargetInfo::isValidCPUName(StringRef Name) const { return llvm::RISCV::checkCPUKind(llvm::RISCV::parseCPUKind(Name), /*Is64Bit=*/false); diff --git a/clang/test/CodeGen/RISCV/riscv-vscale-range.c b/clang/test/CodeGen/RISCV/riscv-vscale-range.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-vscale-range.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple riscv64 -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-NO-V +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -target-feature +zvl32b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL32b +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -target-feature +zvl64b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL64b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-V +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl256b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL256b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl512b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL512b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl1024b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL1024b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl2048b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL2048b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl4096b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL4096b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl8192b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL8192b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl16384b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL16384b +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvl32768b -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ZVL32768b + +// CHECK-LABEL: @func() #0 +// CHECK-NO-V-NOT: vscale_range +// CHECK-ZVL32b-NOT: vscale_range +// CHECK-ZVL64b: attributes #0 = { {{.*}} vscale_range(1,1024) {{.*}} "target-features"="+64bit,+zve32x,+zve64x,+zvl32b,+zvl64b" +// CHECK-V: attributes #0 = { {{.*}} vscale_range(2,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl128b,+zvl32b,+zvl64b" +// CHECK-ZVL256b: attributes #0 = { {{.*}} vscale_range(4,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl128b,+zvl256b,+zvl32b,+zvl64b" +// CHECK-ZVL512b: attributes #0 = { {{.*}} vscale_range(8,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl128b,+zvl256b,+zvl32b,+zvl512b,+zvl64b" +// CHECK-ZVL1024b: attributes #0 = { {{.*}} vscale_range(16,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl256b,+zvl32b,+zvl512b,+zvl64b" +// CHECK-ZVL2048b: attributes #0 = { {{.*}} vscale_range(32,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl2048b,+zvl256b,+zvl32b,+zvl512b,+zvl64b" +// CHECK-ZVL4096b: attributes #0 = { {{.*}} vscale_range(64,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl2048b,+zvl256b,+zvl32b,+zvl4096b,+zvl512b,+zvl64b" +// CHECK-ZVL8192b: attributes #0 = { {{.*}} vscale_range(128,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl2048b,+zvl256b,+zvl32b,+zvl4096b,+zvl512b,+zvl64b,+zvl8192b" +// CHECK-ZVL16384b: attributes #0 = { {{.*}} vscale_range(256,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl16384b,+zvl2048b,+zvl256b,+zvl32b,+zvl4096b,+zvl512b,+zvl64b,+zvl8192b" +// CHECK-ZVL32768b: attributes #0 = { {{.*}} vscale_range(512,1024) {{.*}} "target-features"="+64bit,+d,+f,+v,+zvl1024b,+zvl128b,+zvl16384b,+zvl2048b,+zvl256b,+zvl32768b,+zvl32b,+zvl4096b,+zvl512b,+zvl64b,+zvl8192b" +void func() {} diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -172,6 +172,14 @@ StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64); StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo); +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; +// The specification-defined lower- and upper-bounds on VLEN supported by LLVM, +// in bits. Note that Zvl* extensions may raise the minimum. +// FIXME: The absolute minimum is technically 32 but we don't support it. +static constexpr unsigned RVVVLENBitsMin = 64; +static constexpr unsigned RVVVLENBitsMax = 65536; + } // namespace RISCV namespace ARM { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -666,11 +666,6 @@ }; }; -namespace RISCV { -// We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; -} // namespace RISCV - namespace RISCVVIntrinsicsTable { struct RISCVVIntrinsicInfo { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -104,6 +104,8 @@ unsigned XLen = 32; ExtZvl ZvlLen = ExtZvl::NotSet; MVT XLenVT = MVT::i32; + unsigned RVVVectorBitsMin; + unsigned RVVVectorBitsMax; uint8_t MaxInterleaveFactor = 2; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; BitVector UserReservedRegister; @@ -124,7 +126,8 @@ public: // Initializes the data members to match that of the specified triple. RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, - StringRef FS, StringRef ABIName, const TargetMachine &TM); + StringRef FS, StringRef ABIName, unsigned RVVVectorBitsMin, + unsigned RVVVectorLMULMax, const TargetMachine &TM); // Parses features string setting specified subtarget options. The // definition of this function is auto-generated by tblgen. @@ -243,8 +246,26 @@ // Return the known range for the bit length of RVV data registers. A value // of 0 means nothing is known about that particular limit beyond what's // implied by the architecture. - unsigned getMaxRVVVectorSizeInBits() const; - unsigned getMinRVVVectorSizeInBits() const; + unsigned getMaxRVVVectorSizeInBits() const { + assert(hasVInstructions() && + "Tried to get vector length without Zve or V extension support!"); + // ZvlLen specifies the minimum required vlen. The upper bound provided by + // riscv-v-vector-bits-max should be no less than it. + if (RVVVectorBitsMax != 0 && RVVVectorBitsMax < ZvlLen) + report_fatal_error("riscv-v-vector-bits-max specified is lower " + "than the Zvl*b limitation"); + return RVVVectorBitsMax; + } + unsigned getMinRVVVectorSizeInBits() const { + assert(hasVInstructions() && + "Tried to get vector length without Zve or V extension support!"); + // ZvlLen specifies the minimum required vlen. The lower bound provided by + // riscv-v-vector-bits-min should be no less than it. + if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen) + report_fatal_error("riscv-v-vector-bits-min specified is lower " + "than the Zvl*b limitation"); + return RVVVectorBitsMin; + } unsigned getMaxLMULForFixedLengthVectors() const; unsigned getMaxELENForFixedLengthVectors() const; bool useRVVForFixedLengthVectors() const; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -28,18 +28,6 @@ #define GET_SUBTARGETINFO_CTOR #include "RISCVGenSubtargetInfo.inc" -static cl::opt RVVVectorBitsMax( - "riscv-v-vector-bits-max", - cl::desc("Assume V extension vector registers are at most this big, " - "with zero meaning no maximum size is assumed."), - cl::init(0), cl::Hidden); - -static cl::opt RVVVectorBitsMin( - "riscv-v-vector-bits-min", - cl::desc("Assume V extension vector registers are at least this big, " - "with zero meaning no minimum size is assumed."), - cl::init(0), cl::Hidden); - static cl::opt RVVVectorLMULMax( "riscv-v-fixed-length-vector-lmul-max", cl::desc("The maximum LMUL value to use for fixed length vectors. " @@ -91,10 +79,14 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, - StringRef ABIName, const TargetMachine &TM) + StringRef ABIName, unsigned RVVVectorBitsMin, + unsigned RVVVectorBitsMax, + const TargetMachine &TM) : RISCVGenSubtargetInfo(TT, CPU, TuneCPU, FS), + RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax), UserReservedRegister(RISCV::NUM_TARGET_REGS), - FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), + FrameLowering( + initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering())); Legalizer.reset(new RISCVLegalizerInfo(*this)); @@ -136,56 +128,6 @@ : std::max(2, RISCVMaxBuildIntsCost); } -unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { - assert(hasVInstructions() && - "Tried to get vector length without Zve or V extension support!"); - if (RVVVectorBitsMax == 0) - return 0; - - // ZvlLen specifies the minimum required vlen. The upper bound provided by - // riscv-v-vector-bits-max should be no less than it. - if (RVVVectorBitsMax < ZvlLen) - report_fatal_error("riscv-v-vector-bits-max specified is lower " - "than the Zvl*b limitation"); - - // FIXME: Change to >= 32 when VLEN = 32 is supported - assert( - RVVVectorBitsMax >= 64 && RVVVectorBitsMax <= 65536 && - isPowerOf2_32(RVVVectorBitsMax) && - "V or Zve* extension requires vector length to be in the range of 64 to " - "65536 and a power of 2!"); - assert(RVVVectorBitsMax >= RVVVectorBitsMin && - "Minimum V extension vector length should not be larger than its " - "maximum!"); - unsigned Max = std::max(RVVVectorBitsMin, RVVVectorBitsMax); - return PowerOf2Floor((Max < 64 || Max > 65536) ? 0 : Max); -} - -unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const { - // ZvlLen specifies the minimum required vlen. The lower bound provided by - // riscv-v-vector-bits-min should be no less than it. - if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen) - report_fatal_error("riscv-v-vector-bits-min specified is lower " - "than the Zvl*b limitation"); - - assert(hasVInstructions() && - "Tried to get vector length without Zve or V extension support!"); - // FIXME: Change to >= 32 when VLEN = 32 is supported - assert( - (RVVVectorBitsMin == 0 || - (RVVVectorBitsMin >= 64 && RVVVectorBitsMin <= 65536 && - isPowerOf2_32(RVVVectorBitsMin))) && - "V or Zve* extension requires vector length to be in the range of 64 to " - "65536 and a power of 2!"); - assert((RVVVectorBitsMax >= RVVVectorBitsMin || RVVVectorBitsMax == 0) && - "Minimum V extension vector length should not be larger than its " - "maximum!"); - unsigned Min = RVVVectorBitsMin; - if (RVVVectorBitsMax != 0) - Min = std::min(RVVVectorBitsMin, RVVVectorBitsMax); - return PowerOf2Floor((Min < 64 || Min > 65536) ? 0 : Min); -} - unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const { assert(hasVInstructions() && "Tried to get vector length without Zve or V extension support!"); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -32,6 +32,18 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt RVVVectorBitsMaxOpt( + "riscv-v-vector-bits-max", + cl::desc("Assume V extension vector registers are at most this big, " + "with zero meaning no maximum size is assumed."), + cl::init(0), cl::Hidden); + +static cl::opt RVVVectorBitsMinOpt( + "riscv-v-vector-bits-min", + cl::desc("Assume V extension vector registers are at least this big, " + "with zero meaning no minimum size is assumed."), + cl::init(0), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -80,13 +92,63 @@ Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); + unsigned RVVBitsMin = 0; + unsigned RVVBitsMax = 0; + Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); + if (VScaleRangeAttr.isValid()) { + RVVBitsMin = VScaleRangeAttr.getVScaleRangeMin() * RISCV::RVVBitsPerBlock; + if (VScaleRangeAttr.getVScaleRangeMax().hasValue()) + RVVBitsMax = + *VScaleRangeAttr.getVScaleRangeMax() * RISCV::RVVBitsPerBlock; + else + RVVBitsMax = RISCV::RVVVLENBitsMax; + } + // Allow user options to override these. + if (RVVVectorBitsMinOpt) + RVVBitsMin = RVVVectorBitsMinOpt; + if (RVVVectorBitsMaxOpt) + RVVBitsMax = RVVVectorBitsMaxOpt; + + assert((RVVBitsMin == 0 || isPowerOf2_32(RVVBitsMin)) && + "RVV requires vector length to be a power of two!"); + assert((RVVBitsMax == 0 || isPowerOf2_32(RVVBitsMax)) && + "RVV requires vector length to be a power of two!"); + assert((RVVBitsMin == 0 || RVVBitsMin >= RISCV::RVVVLENBitsMin) && + "RVV vector size must be no smaller than the minimum allowed by the " + "specification!"); + assert(RVVBitsMax <= RISCV::RVVVLENBitsMax && + "RVV vector size must be no larger than the maximum allowed by the " + "specification!"); + assert((RVVBitsMax == 0 || RVVBitsMax >= RVVBitsMin) && + "Minimum RVV vector size should not be larger than its maximum!"); + + // Sanitize user input in case of no asserts. + if (RVVBitsMax != 0) + RVVBitsMin = std::min(RVVBitsMin, RVVBitsMax); + RVVBitsMin = PowerOf2Floor( + (RVVBitsMin < RISCV::RVVVLENBitsMin || RVVBitsMin > RISCV::RVVVLENBitsMax) + ? 0 + : RVVBitsMin); + + RVVBitsMax = std::max(RVVBitsMin, RVVBitsMax); + RVVBitsMax = PowerOf2Floor( + (RVVBitsMax < RISCV::RVVVLENBitsMin || RVVBitsMax > RISCV::RVVVLENBitsMax) + ? 0 + : RVVBitsMax); + std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; std::string TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + std::string Key = CPU + TuneCPU + FS; + Key += "RVVMin"; + Key += std::to_string(RVVBitsMin); + Key += "RVVMax"; + Key += std::to_string(RVVBitsMax); + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -103,7 +165,8 @@ } ABIName = ModuleTargetABI->getString(); } - I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, ABIName, *this); + I = std::make_unique( + TargetTriple, CPU, TuneCPU, FS, ABIName, RVVBitsMin, RVVBitsMax, *this); } return I.get(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -22,6 +22,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/IR/Function.h" +#include "llvm/Support/TargetParser.h" namespace llvm { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s + +define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { +; CHECK-LABEL: vadd_v512i8_zvl128: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: addi a2, a3, 128 +; CHECK-NEXT: addi a4, a3, 384 +; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a3, 256 +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v16, (a4) +; CHECK-NEXT: vle8.v v24, (a2) +; CHECK-NEXT: vle8.v v0, (a3) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v0, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v8 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8re8.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v0, v24, v0 +; CHECK-NEXT: vse8.v v0, (a0) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vse8.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vse8.v v16, (a1) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl256(<512 x i8> %a, <512 x i8> %b) #1 { +; CHECK-LABEL: vadd_v512i8_zvl256: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: li a2, 256 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vle8.v v0, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl512(<512 x i8> %a, <512 x i8> %b) #2 { +; CHECK-LABEL: vadd_v512i8_zvl512: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl1024(<512 x i8> %a, <512 x i8> %b) #3 { +; CHECK-LABEL: vadd_v512i8_zvl1024: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl2048(<512 x i8> %a, <512 x i8> %b) #4 { +; CHECK-LABEL: vadd_v512i8_zvl2048: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +define <512 x i8> @vadd_v512i8_zvl4096(<512 x i8> %a, <512 x i8> %b) #5 { +; CHECK-LABEL: vadd_v512i8_zvl4096: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %c = add <512 x i8> %a, %b + ret <512 x i8> %c +} + +attributes #0 = { vscale_range(2,1024) } +attributes #1 = { vscale_range(4,1024) } +attributes #2 = { vscale_range(8,1024) } +attributes #3 = { vscale_range(16,1024) } +attributes #4 = { vscale_range(32,1024) } +attributes #5 = { vscale_range(64,1024) }