Index: llvm/trunk/lib/Target/Hexagon/Hexagon.td =================================================================== --- llvm/trunk/lib/Target/Hexagon/Hexagon.td +++ llvm/trunk/lib/Target/Hexagon/Hexagon.td @@ -60,6 +60,9 @@ "Enable generation of duplex instruction">; def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19", "true", "Reserve register R19">; +def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim", + "NoreturnStackElim", "true", + "Eliminate stack allocation in a noreturn function when possible">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. Index: llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h +++ llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.h @@ -41,6 +41,8 @@ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override {} + bool enableCalleeSaveSkip(const MachineFunction &MF) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override { Index: llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ llvm/trunk/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -550,6 +550,36 @@ } } +/// Returns true if the target can safely skip saving callee-saved registers +/// for noreturn nounwind functions. +bool HexagonFrameLowering::enableCalleeSaveSkip( + const MachineFunction &MF) const { + const auto &F = MF.getFunction(); + assert(F.hasFnAttribute(Attribute::NoReturn) && + F.getFunction().hasFnAttribute(Attribute::NoUnwind) && + !F.getFunction().hasFnAttribute(Attribute::UWTable)); + + // No need to save callee saved registers if the function does not return. + return MF.getSubtarget().noreturnStackElim(); +} + +// Helper function used to determine when to eliminate the stack frame for +// functions marked as noreturn and when the noreturn-stack-elim options are +// specified. When both these conditions are true, then a FP may not be needed +// if the function makes a call. It is very similar to enableCalleeSaveSkip, +// but it used to check if the allocframe can be eliminated as well. +static bool enableAllocFrameElim(const MachineFunction &MF) { + const auto &F = MF.getFunction(); + const auto &MFI = MF.getFrameInfo(); + const auto &HST = MF.getSubtarget(); + assert(!MFI.hasVarSizedObjects() && + !HST.getRegisterInfo()->needsStackRealignment(MF)); + return F.hasFnAttribute(Attribute::NoReturn) && + F.hasFnAttribute(Attribute::NoUnwind) && + !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() && + MFI.getStackSize() == 0; +} + void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const { MachineFunction &MF = *MBB.getParent(); @@ -994,7 +1024,7 @@ } const auto &HMFI = *MF.getInfo(); - if (MFI.hasCalls() || HMFI.hasClobberLR()) + if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR()) return true; return false; Index: llvm/trunk/lib/Target/Hexagon/HexagonSubtarget.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonSubtarget.h +++ llvm/trunk/lib/Target/Hexagon/HexagonSubtarget.h @@ -56,6 +56,7 @@ bool HasMemNoShuf = false; bool EnableDuplex = false; bool ReservedR19 = false; + bool NoreturnStackElim = false; public: Hexagon::ArchEnum HexagonArchVersion; @@ -168,6 +169,8 @@ bool hasReservedR19() const { return ReservedR19; } bool usePredicatedCalls() const; + bool noreturnStackElim() const { return NoreturnStackElim; } + bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; Index: llvm/trunk/test/CodeGen/Hexagon/noreturn-stack-elim.ll =================================================================== --- llvm/trunk/test/CodeGen/Hexagon/noreturn-stack-elim.ll +++ llvm/trunk/test/CodeGen/Hexagon/noreturn-stack-elim.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple=hexagon-unknown--elf -hexagon-initial-cfg-cleanup=false < %s | FileCheck %s +; RUN: llc -mtriple=hexagon-unknown--elf -hexagon-initial-cfg-cleanup=false -mattr=+noreturn-stack-elim < %s | FileCheck %s --check-prefix=CHECK-FLAG + +; Test the noreturn stack elimination feature. We've added a new flag/feature +; that attempts to eliminate the local stack for noreturn nounwind functions. +; The optimization eliminates the need to save callee saved registers, and +; eliminates the allocframe, when no local stack space is needed. + +%struct.A = type { i32, i32 } + +; Test the case when noreturn-stack-elim determins that both callee saved +; register do not need to be saved, and the allocframe can be eliminated. + +; CHECK-LABEL: test1 +; CHECK: memd(r29+#-16) = r17:16 +; CHECK: allocframe + +; CHECK-FLAG-LABEL: test1 +; CHECK-FLAG-NOT: memd(r29+#-16) = r17:16 +; CHECK-FLAG-NOT: allocframe + +define dso_local void @test1(i32 %a, %struct.A* %b) local_unnamed_addr #0 { +entry: + %n = getelementptr inbounds %struct.A, %struct.A* %b, i32 0, i32 0 + store i32 %a, i32* %n, align 4 + tail call void @f1() #3 + tail call void @nrf1(%struct.A* %b) #4 + unreachable +} + +; Test that noreturn-stack-elim doesn't eliminate the local stack, when +; a function needs to allocate a local variable. + +; CHECK-LABEL: test2 +; CHECK: allocframe + +; CHECK-FLAG-LABEL: test2 +; CHECK-FLAG: allocframe + +define dso_local void @test2() local_unnamed_addr #0 { +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 + call void @f3(i32* nonnull %a) #4 + unreachable +} + +; Test that noreturn-stack-elim can elimnate the allocframe when no locals +; are allocated on the stack. + +; CHECK-LABEL: test3 +; CHECK: allocframe + +; CHECK-FLAG-LABEL: test3 +; CHECK-FLAG-NOT: allocframe + +define dso_local void @test3(i32 %a) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %a, 5 + call void @f2(i32 %add) + unreachable +} + +; Test that nothing is optimized when an alloca is needed for local stack. + +; CHECK-LABEL: test4 +; CHECK: allocframe + +; CHECK-FLAG-LABEL: test4 +; CHECK-FLAG: allocframe + +define dso_local void @test4(i32 %n) local_unnamed_addr #0 { +entry: + %vla = alloca i32, i32 %n, align 8 + call void @f3(i32* nonnull %vla) #4 + unreachable +} + + +declare dso_local void @f1() local_unnamed_addr +declare dso_local void @f2(i32) local_unnamed_addr +declare dso_local void @f3(i32*) local_unnamed_addr + +declare dso_local void @nrf1(%struct.A*) local_unnamed_addr #2 + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #5 + +attributes #0 = { noreturn nounwind } +attributes #2 = { noreturn } +attributes #3 = { nounwind } +attributes #4 = { noreturn nounwind } +attributes #5 = { argmemonly nounwind } +