diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -363,6 +363,11 @@ return true; } + /// Returns the StackID that scalable vectors should be associated with. + virtual TargetStackID::Value getStackIDForScalableVectors() const { + return TargetStackID::Default; + } + virtual bool isSupportedStackID(TargetStackID::Value ID) const { switch (ID) { default: diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -144,7 +144,8 @@ if (AI->isStaticAlloca() && (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast(AI->getArraySize()); - uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); + uint64_t TySize = + MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. @@ -159,6 +160,12 @@ MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); } + // Scalable vectors may need a special StackID to distinguish + // them from other (fixed size) stack objects. + if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + MF->getFrameInfo().setStackID(FrameIndex, + TFI->getStackIDForScalableVectors()); + StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. if (Iter != CatchObjects.end()) { diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -72,6 +72,7 @@ } bool enableStackSlotScavenging(const MachineFunction &MF) const override; + TargetStackID::Value getStackIDForScalableVectors() const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -206,6 +206,11 @@ return DefaultSafeSPDisplacement; } +TargetStackID::Value +AArch64FrameLowering::getStackIDForScalableVectors() const { + return TargetStackID::SVEVector; +} + /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); @@ -2488,11 +2493,12 @@ /// returns true if there are any SVE callee saves. static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max) { + Min = std::numeric_limits::max(); + Max = std::numeric_limits::min(); + if (!MFI.isCalleeSavedInfoValid()) return false; - Min = std::numeric_limits::max(); - Max = std::numeric_limits::min(); const std::vector &CSI = MFI.getCalleeSavedInfo(); for (auto &CS : CSI) { if (AArch64::ZPRRegClass.contains(CS.getReg()) || @@ -2526,6 +2532,11 @@ Offset = FixedOffset; } + auto Assign = [&MFI](int FI, int64_t Offset) { + LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); + MFI.setObjectOffset(FI, Offset); + }; + // Then process all callee saved slots. if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { // Make sure to align the last callee save slot. @@ -2535,17 +2546,40 @@ for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { Offset += MFI.getObjectSize(I); Offset = alignTo(Offset, MFI.getObjectAlignment(I)); - if (AssignOffsets) { - LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset - << "]\n"); - MFI.setObjectOffset(I, -Offset); - } + if (AssignOffsets) + Assign(I, -Offset); } } - // Note: We don't take allocatable stack objects into - // account yet, because allocation for those is not yet - // implemented. + // Create a buffer of SVE objects to allocate and sort it. + SmallVector ObjectsToAllocate; + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { + unsigned StackID = MFI.getStackID(I); + if (StackID != TargetStackID::SVEVector) + continue; + if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) + continue; + if (MFI.isDeadObjectIndex(I)) + continue; + + ObjectsToAllocate.push_back(I); + } + + // Allocate all SVE locals and spills + for (unsigned FI : ObjectsToAllocate) { + unsigned Align = MFI.getObjectAlignment(FI); + // FIXME: Given that the length of SVE vectors is not necessarily a power of + // two, we'd need to align every object dynamically at runtime if the + // alignment is larger than 16. This is not yet supported. + if (Align > 16) + report_fatal_error( + "Alignment of scalable vectors > 16 bytes is not yet supported"); + + Offset = alignTo(Offset + MFI.getObjectSize(FI), Align); + if (AssignOffsets) + Assign(FI, -Offset); + } + return Offset; } diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -34,6 +34,7 @@ define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable } + define aarch64_sve_vector_pcs void @frame_layout() nounwind { entry: unreachable } ... # +----------+ @@ -512,3 +513,69 @@ RET_ReallyLR --- +# Frame layout should be: +# +---------------------+ <- Old SP +# | callee save z8 |@ -16 +# | callee save z23 |@ -32 +# | callee save p4 |@ -34 +# | callee save p15 |@ -48 +# | id #0 (size 32) |@ -80 +# | id #1 (size 4) |@ -84 +# | id #2 (size 16) |@ -112 +# | id #3 (size 2) |@ -114 +# | id #4 (size 16) |@ -144 +# | id #5 (size 2) |@ -146 +# +- - - - - - - - - - -+ <- New SP @-160 +# CHECK-LABEL: name: frame_layout +# CHECK: stack: +# CHECK: - { id: 0, name: '', type: default, offset: -80, size: 32, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 1, name: '', type: default, offset: -84, size: 4, alignment: 2, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 2, name: '', type: default, offset: -112, size: 16, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 3, name: '', type: default, offset: -114, size: 2, alignment: 2, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 4, name: '', type: spill-slot, offset: -144, size: 16, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 5, name: '', type: spill-slot, offset: -146, size: 2, alignment: 2, +# CHECK-NEXT: stack-id: sve-vec, +# CHECK: - { id: 6, name: '', type: spill-slot, offset: -16, size: 16, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, callee-saved-register: '$z8', +# CHECK: - { id: 7, name: '', type: spill-slot, offset: -32, size: 16, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, callee-saved-register: '$z23', +# CHECK: - { id: 8, name: '', type: spill-slot, offset: -34, size: 2, alignment: 2, +# CHECK-NEXT: stack-id: sve-vec, callee-saved-register: '$p4', +# CHECK: - { id: 9, name: '', type: spill-slot, offset: -48, size: 2, alignment: 16, +# CHECK-NEXT: stack-id: sve-vec, callee-saved-register: '$p15', +# CHECK: - { id: 10, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, +# CHECK-NEXT: stack-id: default, callee-saved-register: '$fp', +# +# CHECK: bb.0.entry: +# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 +# CHECK-NEXT: STR_PXI killed $p15, $sp, 6 +# CHECK-NEXT: STR_PXI killed $p4, $sp, 7 +# CHECK-NEXT: STR_ZXI killed $z23, $sp, 1 +# CHECK-NEXT: STR_ZXI killed $z8, $sp, 2 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -7 +name: frame_layout +stack: + - { id: 0, type: default, size: 32, alignment: 16, stack-id: sve-vec } + - { id: 1, type: default, size: 4, alignment: 2, stack-id: sve-vec } + - { id: 2, type: default, size: 16, alignment: 16, stack-id: sve-vec } + - { id: 3, type: default, size: 2, alignment: 2, stack-id: sve-vec } + - { id: 4, type: spill-slot, size: 16, alignment: 16, stack-id: sve-vec } + - { id: 5, type: spill-slot, size: 2, alignment: 2, stack-id: sve-vec } +body: | + bb.0.entry: + + ; Trigger some callee saves + $z8 = IMPLICIT_DEF + $z23 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + RET_ReallyLR + +--- diff --git a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefix=CHECKCG +; RUN: llc -mtriple=aarch64 -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL + +; CHECKCG-LABEL: foo: +; CHECKCG: addvl sp, sp, #-1 + +; CHECKISEL-LABEL: name: foo +; CHECKISEL: stack: +; CHECKISEL: id: 0, name: ptr, type: default, offset: 0, size: 16, alignment: 16, +; CHECKISEL-NEXT: stack-id: sve-vec +define i32 @foo( %val) { + %ptr = alloca + %res = call i32 @bar(* %ptr) + ret i32 %res +} + +declare i32 @bar(* %ptr);