diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -103,6 +103,10 @@ } } + void + orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate) const override; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, uint64_t StackBumpBytes) const; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -175,6 +175,10 @@ cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden); +static cl::opt OrderFrameObjects("aarch64-order-frame-objects", + cl::desc("sort stack allocations"), + cl::init(true), cl::Hidden); + STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// This is the biggest offset to the stack pointer we can encode in aarch64 @@ -3100,3 +3104,169 @@ return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), getStackAlignment()); } + +namespace { +struct FrameObject { + bool IsValid = false; + // Index of the object in MFI. + int ObjectIndex = 0; + // Group ID this object belongs to. + int GroupIndex = -1; + // This object should be placed first (closest to SP). + bool ObjectFirst = false; + // This object's group (which always contains the object with + // ObjectFirst==true) should be placed first. + bool GroupFirst = false; +}; + +class GroupBuilder { + SmallVector CurrentMembers; + int NextGroupIndex = 0; + std::vector &Objects; + +public: + GroupBuilder(std::vector &Objects) : Objects(Objects) {} + void AddMember(int Index) { CurrentMembers.push_back(Index); } + void EndCurrentGroup() { + if (CurrentMembers.size() > 1) { + // Create a new group with the current member list. This might remove them + // from their pre-existing groups. That's OK, dealing with overlapping + // groups is too hard and unlikely to make a difference. + LLVM_DEBUG(dbgs() << "group:"); + for (int Index : CurrentMembers) { + Objects[Index].GroupIndex = NextGroupIndex; + LLVM_DEBUG(dbgs() << " " << Index); + } + LLVM_DEBUG(dbgs() << "\n"); + NextGroupIndex++; + } + CurrentMembers.clear(); + } +}; + +bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { + // For consistency in our comparison, all invalid objects are placed + // at the end. This also allows us to stop walking when we hit the + // first invalid item after it's all sorted. + if (!A.IsValid) + return false; + if (!B.IsValid) + return true; + + // Objects at a lower index are closer to FP; objects at a higher index are + // closer to SP. + + // The "first" object goes first (closest to SP). + if (A.ObjectFirst != B.ObjectFirst) + return B.ObjectFirst; + + // The "first" group members follow. + if (A.GroupFirst != B.GroupFirst) + return B.GroupFirst; + + // Order the rest by the group ID first to keep the groups together. + // Higher numbered groups are more likely to be around longer (i.e. untagged + // in the function epilogue and not at some earlier point). Place them closer + // to SP. + if (A.GroupIndex != B.GroupIndex) + return A.GroupIndex < B.GroupIndex; + + // Otherwise, keep the objects in the original order. + return A.ObjectIndex < B.ObjectIndex; +} +} // namespace + +void AArch64FrameLowering::orderFrameObjects( + const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const { + if (!OrderFrameObjects || ObjectsToAllocate.empty()) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + std::vector FrameObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + FrameObjects[Obj].IsValid = true; + FrameObjects[Obj].ObjectIndex = Obj; + } + + // Identify stack slots that are tagged at the same time. + GroupBuilder GB(FrameObjects); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + int OpIndex; + switch (MI.getOpcode()) { + case AArch64::STGloop: + case AArch64::STZGloop: + OpIndex = 3; + break; + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + OpIndex = 1; + break; + default: + OpIndex = -1; + } + + int TaggedFI = -1; + if (OpIndex >= 0) { + const MachineOperand &MO = MI.getOperand(OpIndex); + if (MO.isFI()) { + int FI = MO.getIndex(); + if (FI >= 0 && FI < MFI.getObjectIndexEnd() && + FrameObjects[FI].IsValid) + TaggedFI = FI; + } + } + + // If this is a stack tagging instruction for a slot that is not part of a + // group yet, either start a new group or add it to the current one. + if (TaggedFI >= 0) + GB.AddMember(TaggedFI); + else + GB.EndCurrentGroup(); + } + // Groups should never span multiple basic blocks. + GB.EndCurrentGroup(); + } + + // If the function's tagged base pointer is pinned to a stack slot, we want to + // put that slot first when possible. This will likely place it at SP + 0, + // and save one instruction when generating the base pointer because IRG does + // not allow an immediate offset. + const AArch64FunctionInfo &AFI = *MF.getInfo(); + Optional TBPI = AFI.getTaggedBasePointerIndex(); + if (TBPI) { + FrameObjects[*TBPI].ObjectFirst = true; + FrameObjects[*TBPI].GroupFirst = true; + int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; + if (FirstGroupIndex >= 0) + for (FrameObject &Object : FrameObjects) + if (Object.GroupIndex == FirstGroupIndex) + Object.GroupFirst = true; + } + + llvm::stable_sort(FrameObjects, FrameObjectCompare); + + int i = 0; + for (auto &Obj : FrameObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[i++] = Obj.ObjectIndex; + } + + LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj + : FrameObjects) { + if (!Obj.IsValid) + break; + dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; + if (Obj.ObjectFirst) + dbgs() << ", first"; + if (Obj.GroupFirst) + dbgs() << ", group-first"; + dbgs() << "\n"; + }); +} diff --git a/llvm/test/CodeGen/AArch64/settag-merge-order.ll b/llvm/test/CodeGen/AArch64/settag-merge-order.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/settag-merge-order.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=1 | FileCheck %s + +declare void @use(i8* %p) +declare void @llvm.aarch64.settag(i8* %p, i64 %a) +declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a) + +; Two loops of size 256; the second loop updates SP. +; After frame reordering, two loops can be merged into one. +define void @stg128_128_gap_128_128() { +entry: +; CHECK-LABEL: stg128_128_gap_128_128: +; CHECK: mov x8, #512 +; CHECK: st2g sp, [sp], #32 +; CHECK: sub x8, x8, #32 +; CHECK: cbnz x8, +; CHECK: ret + %a = alloca i8, i32 128, align 16 + %a2 = alloca i8, i32 128, align 16 + %b = alloca i8, i32 32, align 16 + %c = alloca i8, i32 128, align 16 + %c2 = alloca i8, i32 128, align 16 + call void @use(i8* %b) + call void @llvm.aarch64.settag(i8* %a, i64 128) + call void @llvm.aarch64.settag(i8* %a2, i64 128) + call void @llvm.aarch64.settag(i8* %c, i64 128) + call void @llvm.aarch64.settag(i8* %c2, i64 128) + ret void +} + +define void @stg2(i1 %flag) { +entry: +; CHECK-LABEL: stg2: + %a = alloca i8, i32 160, align 16 + %a2 = alloca i8, i32 160, align 16 + %b = alloca i8, i32 32, align 16 + %c = alloca i8, i32 128, align 16 + %c2 = alloca i8, i32 128, align 16 + call void @use(i8* %b) + br i1 %flag, label %if.then, label %if.else + +if.then: +; CHECK: mov x8, #320 +; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 +; CHECK: cbnz x8, + call void @llvm.aarch64.settag(i8* %a, i64 160) + call void @llvm.aarch64.settag(i8* %a2, i64 160) + br label %if.end + +if.else: +; CHECK: mov x8, #256 +; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 +; CHECK: cbnz x8, + call void @llvm.aarch64.settag(i8* %c, i64 128) + call void @llvm.aarch64.settag(i8* %c2, i64 128) + br label %if.end + +if.end: +; CHECK: mov x8, #576 +; CHECK: st2g sp, [sp], #32 +; CHECK: sub x8, x8, #32 +; CHECK: cbnz x8, + call void @llvm.aarch64.settag(i8* %a, i64 160) + call void @llvm.aarch64.settag(i8* %a2, i64 160) + call void @llvm.aarch64.settag(i8* %c, i64 128) + call void @llvm.aarch64.settag(i8* %c2, i64 128) + +; CHECK: ret + ret void +} diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll --- a/llvm/test/CodeGen/AArch64/settag-merge.ll +++ b/llvm/test/CodeGen/AArch64/settag-merge.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s declare void @use(i8* %p) declare void @llvm.aarch64.settag(i8* %p, i64 %a)