Index: llvm/include/llvm/CodeGen/TargetFrameLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -134,6 +134,12 @@ /// was called). virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const; + /// This method returns whether or not an object with the given stack id can + /// be bundled into the local area. + virtual bool bundleStackIdIntoLocalArea(unsigned StackId) const { + return true; + } + /// getOffsetOfLocalArea - This method returns the offset of the local area /// from the stack pointer on entrance to a function. /// Index: llvm/lib/CodeGen/LocalStackSlotAllocation.cpp =================================================================== --- llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -220,6 +220,8 @@ continue; if (StackProtectorFI == (int)i) continue; + if (!TFI.bundleStackIdIntoLocalArea(MFI.getStackID(i))) + continue; switch (MFI.getObjectSSPLayout(i)) { case MachineFrameInfo::SSPLK_None: @@ -254,6 +256,8 @@ continue; if (ProtectedObjs.count(i)) continue; + if (!TFI.bundleStackIdIntoLocalArea(MFI.getStackID(i))) + continue; AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); } Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -104,6 +104,13 @@ } } + bool bundleStackIdIntoLocalArea(unsigned StackId) const override { + // We don't support putting SVE objects into the pre-allocated local + // frame block at the moment. It's not trivial to support this and there + // has been no cost analysis to prove this is profitable. + return StackId != TargetStackID::SVEVector; + } + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; Index: llvm/test/CodeGen/AArch64/sve-localstackalloc.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-localstackalloc.mir @@ -0,0 +1,84 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -start-before=localstackalloc -debug-only=localstackalloc -o - %s 2>&1 | FileCheck %s + +--- | + ; ModuleID = '' + source_filename = "" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + define @insert_32i8_idx( %a, i8 %elt, i64 %idx) #0 { + %ins = insertelement %a, i8 %elt, i64 %idx + ret %ins + } + + attributes #0 = { "target-features"="+sve" } + +... +--- +name: insert_32i8_idx +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: zpr, preferred-register: '' } + - { id: 1, class: zpr, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr64, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64sp, preferred-register: '' } + - { id: 7, class: zpr, preferred-register: '' } + - { id: 8, class: zpr, preferred-register: '' } +liveins: + - { reg: '$z0', virtual-reg: '%0' } + - { reg: '$z1', virtual-reg: '%1' } + - { reg: '$w0', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: +# CHECK-NOT: Allocate FI(0) to local offset + - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 16, + stack-id: sve-vec, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $z0, $z1, $w0 + + %2:gpr32 = COPY $w0 + %1:zpr = COPY $z1 + %0:zpr = COPY $z0 + %5:ppr_3b = PTRUE_B 31 + %6:gpr64sp = ADDXri %stack.0, 0, 0 + ST1B_IMM %1, %5, %6, 1 :: (store unknown-size, align 16) + ST1B_IMM %0, %5, %stack.0, 0 :: (store unknown-size into %stack.0, align 16) + %7:zpr = LD1B_IMM %5, %6, 1 :: (load unknown-size from %stack.0 + 16, align 16) + %8:zpr = LD1B_IMM %5, %stack.0, 0 :: (load unknown-size from %stack.0, align 16) + $z0 = COPY %8 + $z1 = COPY %7 + RET_ReallyLR implicit $z0, implicit $z1 + +...