diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -261,6 +261,9 @@ bool IsLittle; + unsigned MinSVEVectorSizeInBits; + unsigned MaxSVEVectorSizeInBits; + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -291,7 +294,8 @@ /// of the specified triple. AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, - bool LittleEndian); + bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride, + unsigned MaxSVEVectorSizeInBitsOverride); const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -47,13 +47,13 @@ cl::desc("Call nonlazybind functions via direct GOT load"), cl::init(false), cl::Hidden); -static cl::opt SVEVectorBitsMax( +static cl::opt SVEVectorBitsMaxOpt( "aarch64-sve-vector-bits-max", cl::desc("Assume SVE vector registers are at most this big, " "with zero meaning no maximum size is assumed."), cl::init(0), cl::Hidden); -static cl::opt SVEVectorBitsMin( +static cl::opt SVEVectorBitsMinOpt( "aarch64-sve-vector-bits-min", cl::desc("Assume SVE vector registers are at least this big, " "with zero meaning no minimum size is assumed."), @@ -210,11 +210,19 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const TargetMachine &TM, bool LittleEndian) + const TargetMachine &TM, bool LittleEndian, + unsigned MinSVEVectorSizeInBitsOverride, + unsigned MaxSVEVectorSizeInBitsOverride) : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), + MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride + ? MinSVEVectorSizeInBitsOverride + : SVEVectorBitsMinOpt), + MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride + ? MaxSVEVectorSizeInBitsOverride + : SVEVectorBitsMaxOpt), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), TLInfo(TM, *this) { @@ -358,24 +366,26 @@ unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const { assert(HasSVE && "Tried to get SVE vector length without SVE support!"); - assert(SVEVectorBitsMax % 128 == 0 && + assert(MaxSVEVectorSizeInBits % 128 == 0 && "SVE requires vector length in multiples of 128!"); - assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) && + assert((MaxSVEVectorSizeInBits >= MinSVEVectorSizeInBits || + MaxSVEVectorSizeInBits == 0) && "Minimum SVE vector size should not be larger than its maximum!"); - if (SVEVectorBitsMax == 0) + if (MaxSVEVectorSizeInBits == 0) return 0; - return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; + return (std::max(MinSVEVectorSizeInBits, MaxSVEVectorSizeInBits) / 128) * 128; } unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const { assert(HasSVE && "Tried to get SVE vector length without SVE support!"); - assert(SVEVectorBitsMin % 128 == 0 && + assert(MinSVEVectorSizeInBits % 128 == 0 && "SVE requires vector length in multiples of 128!"); - assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) && + assert((MaxSVEVectorSizeInBits >= MinSVEVectorSizeInBits || + MaxSVEVectorSizeInBits == 0) && "Minimum SVE vector size should not be larger than its maximum!"); - if (SVEVectorBitsMax == 0) - return (SVEVectorBitsMin / 128) * 128; - return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; + if (MaxSVEVectorSizeInBits == 0) + return (MinSVEVectorSizeInBits / 128) * 128; + return (std::min(MinSVEVectorSizeInBits, MaxSVEVectorSizeInBits) / 128) * 128; } bool AArch64Subtarget::useSVEForFixedLengthVectors() const { diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -349,14 +349,31 @@ std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; - auto &I = SubtargetMap[CPU + FS]; + SmallString<512> Key; + + unsigned MinSVEVectorSize = 0; + unsigned MaxSVEVectorSize = 0; + Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); + if (VScaleRangeAttr.isValid()) + std::tie(MinSVEVectorSize, MaxSVEVectorSize) = + VScaleRangeAttr.getVScaleRangeArgs(); + + Key += "SVEMin"; + Key += MinSVEVectorSize; + Key += "SVEMax"; + Key += MaxSVEVectorSize; + Key += CPU; + Key += FS; + + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = std::make_unique(TargetTriple, CPU, FS, *this, - isLittle); + isLittle, MinSVEVectorSize * 128, + MaxSVEVectorSize * 128); } return I.get(); } diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG + +target triple = "aarch64-unknown-linux-gnu" + +define void @func_vscale_none(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-NOARG-LABEL: func_vscale_none: +; CHECK-NOARG: // %bb.0: +; CHECK-NOARG-NEXT: ldp q0, q1, [x0] +; CHECK-NOARG-NEXT: ldp q2, q3, [x1] +; CHECK-NOARG-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NOARG-NEXT: ldp q7, q6, [x1, #32] +; CHECK-NOARG-NEXT: add v1.4s, v1.4s, v3.4s +; CHECK-NOARG-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NOARG-NEXT: add v2.4s, v5.4s, v6.4s +; CHECK-NOARG-NEXT: add v3.4s, v4.4s, v7.4s +; CHECK-NOARG-NEXT: stp q3, q2, [x0, #32] +; CHECK-NOARG-NEXT: stp q0, q1, [x0] +; CHECK-NOARG-NEXT: ret +; +; CHECK-ARG-LABEL: func_vscale_none: +; CHECK-ARG: // %bb.0: +; CHECK-ARG-NEXT: ptrue p0.s, vl16 +; CHECK-ARG-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-ARG-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-ARG-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-ARG-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-ARG-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @func_vscale1_1(<16 x i32>* %a, <16 x i32>* %b) #1 { +; CHECK-LABEL: func_vscale1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x1] +; CHECK-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NEXT: ldp q7, q6, [x1, #32] +; CHECK-NEXT: add v1.4s, v1.4s, v3.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v2.4s, v5.4s, v6.4s +; CHECK-NEXT: add v3.4s, v4.4s, v7.4s +; CHECK-NEXT: stp q3, q2, [x0, #32] +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 { +; CHECK-LABEL: func_vscale2_2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: add x8, x0, #32 // =32 +; CHECK-NEXT: add x9, x1, #32 // =32 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9] +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 { +; CHECK-LABEL: func_vscale2_4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: add x8, x0, #32 // =32 +; CHECK-NEXT: add x9, x1, #32 // =32 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9] +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @func_vscale4_4(<16 x i32>* %a, <16 x i32>* %b) #4 { +; CHECK-LABEL: func_vscale4_4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @func_vscale8_8(<16 x i32>* %a, <16 x i32>* %b) #4 { +; CHECK-LABEL: func_vscale8_8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = add <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +attributes #0 = { "target-features"="+sve" } +attributes #1 = { "target-features"="+sve" vscale_range(1,1) } +attributes #2 = { "target-features"="+sve" vscale_range(2,2) } +attributes #3 = { "target-features"="+sve" vscale_range(2,4) } +attributes #4 = { "target-features"="+sve" vscale_range(4,4) } +attributes #4 = { "target-features"="+sve" vscale_range(8,8) } diff --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp --- a/llvm/unittests/Target/AArch64/InstSizes.cpp +++ b/llvm/unittests/Target/AArch64/InstSizes.cpp @@ -30,7 +30,7 @@ std::unique_ptr createInstrInfo(TargetMachine *TM) { AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *TM, - /* isLittle */ false); + /* isLittle */ false, 0, 0); return std::make_unique(ST); }