Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -359,6 +359,8 @@ LegalizeResult lowerUnmergeValues(MachineInstr &MI); LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI); LegalizeResult lowerShuffleVector(MachineInstr &MI); + Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, + Align Alignment, LLT PtrTy); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); LegalizeResult lowerInsert(MachineInstr &MI); Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5801,21 +5801,12 @@ return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - const auto &MF = *MI.getMF(); - const auto &TFI = *MF.getSubtarget().getFrameLowering(); - if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register AllocSize = MI.getOperand(1).getReg(); - Align Alignment = assumeAligned(MI.getOperand(2).getImm()); - - LLT PtrTy = MRI.getType(Dst); +Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, + Register AllocSize, + Align Alignment, + LLT PtrTy) { LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -5830,7 +5821,25 @@ Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); } - SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + const auto &MF = *MI.getMF(); + const auto &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); + + LLT PtrTy = MRI.getType(Dst); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPTmp = + getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); + MIRBuilder.buildCopy(SPReg, SPTmp); MIRBuilder.buildCopy(Dst, SPTmp); Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -47,6 +47,7 @@ MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -677,7 +677,8 @@ return Query.Types[0] == p0 && Query.Types[1] == s64; }); - getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); + + getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom(); getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); @@ -719,11 +720,52 @@ return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); case TargetOpcode::G_TRUNC: return legalizeVectorTrunc(MI, Helper); + case TargetOpcode::G_DYN_STACKALLOC: + return legalizeDynStackAlloc(MI, Helper); } llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeDynStackAlloc( + MachineInstr &MI, LegalizerHelper &Helper) const { + MachineFunction &MF = *MI.getParent()->getParent(); + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + + // If stack probing is not enabled for this function, use the default + // lowering. + if (!MF.getFunction().hasFnAttribute("probe-stack") || + MF.getFunction().getFnAttribute("probe-stack").getValueAsString() != + "inline-asm") { + Helper.lowerDynStackAlloc(MI); + return true; + } + + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); + + assert(MRI.getType(Dst) == LLT::pointer(0, 64) && + "Unexpected type for dynamic alloca"); + assert(MRI.getType(AllocSize) == LLT::scalar(64) && + "Unexpected type for dynamic alloca"); + + LLT PtrTy = MRI.getType(Dst); + Register SPReg = + Helper.getTargetLowering().getStackPointerRegisterToSaveRestore(); + Register SPTmp = + Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); + auto NewMI = + MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp}); + MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass); + MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return true; +} + static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl &VRegs) { Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-dyn-alloca.mir @@ -19,6 +19,21 @@ ret i128* %addr } + define i8* @test_simple_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { + %addr = alloca i8, i32 %numelts + ret i8* %addr + } + + define i8* @test_aligned_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { + %addr = alloca i8, i32 %numelts, align 32 + ret i8* %addr + } + + define i128* @test_natural_alloca_stack_probing(i32 %numelts) "probe-stack"="inline-asm" { + %addr = alloca i128, i32 %numelts + ret i128* %addr + } + ... --- name: test_simple_alloca @@ -160,3 +175,143 @@ RET_ReallyLR implicit $x0 ... +--- +name: test_simple_alloca_stack_probing +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +stack: + - { id: 0, name: addr, type: variable-sized, alignment: 1 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: test_simple_alloca_stack_probing + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(s32) = COPY $w0 + %3:_(s64) = G_CONSTANT i64 1 + %1:_(s64) = G_ZEXT %0(s32) + %2:_(s64) = G_MUL %1, %3 + %4:_(s64) = G_CONSTANT i64 15 + %5:_(s64) = nuw G_ADD %2, %4 + %6:_(s64) = G_CONSTANT i64 -16 + %7:_(s64) = G_AND %5, %6 + %8:_(p0) = G_DYN_STACKALLOC %7(s64), 0 + $x0 = COPY %8(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_aligned_alloca_stack_probing +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 32 +stack: + - { id: 0, name: addr, type: variable-sized, alignment: 32 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: test_aligned_alloca_stack_probing + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -32 + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[AND1]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(s32) = COPY $w0 + %3:_(s64) = G_CONSTANT i64 1 + %1:_(s64) = G_ZEXT %0(s32) + %2:_(s64) = G_MUL %1, %3 + %4:_(s64) = G_CONSTANT i64 15 + %5:_(s64) = nuw G_ADD %2, %4 + %6:_(s64) = G_CONSTANT i64 -16 + %7:_(s64) = G_AND %5, %6 + %8:_(p0) = G_DYN_STACKALLOC %7(s64), 32 + $x0 = COPY %8(p0) + RET_ReallyLR implicit $x0 + +... +--- +name: test_natural_alloca_stack_probing +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 +stack: + - { id: 0, name: addr, type: variable-sized, alignment: 1 } +machineFunctionInfo: {} +body: | + bb.1 (%ir-block.0): + liveins: $w0 + + ; CHECK-LABEL: name: test_natural_alloca_stack_probing + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $sp + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[PTRTOINT]], [[AND]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr64common(p0) = G_INTTOPTR [[SUB]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[INTTOPTR]](p0) + ; CHECK: PROBED_STACKALLOC_DYN [[INTTOPTR]](p0), implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(s32) = COPY $w0 + %3:_(s64) = G_CONSTANT i64 16 + %1:_(s64) = G_ZEXT %0(s32) + %2:_(s64) = G_MUL %1, %3 + %4:_(s64) = G_CONSTANT i64 15 + %5:_(s64) = nuw G_ADD %2, %4 + %6:_(s64) = G_CONSTANT i64 -16 + %7:_(s64) = G_AND %5, %6 + %8:_(p0) = G_DYN_STACKALLOC %7(s64), 0 + $x0 = COPY %8(p0) + RET_ReallyLR implicit $x0 + +... Index: llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll =================================================================== --- llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -global-isel | FileCheck %s ; Dynamically-sized allocation, needs a loop which can handle any size at ; runtime. The final iteration of the loop will temporarily put SP below the