diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2072,6 +2072,13 @@ return false; } + /// Return true if the MachineBasicBlock can safely be split to the cold + /// section. On AArch64, certain instructions may cause a block to be unsafe + /// to split to the cold section. + virtual bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const { + return true; + } + /// Produce the expression describing the \p MI loading a value into /// the physical register \p Reg. This hook should only be used with /// \p MIs belonging to VReg-less functions. diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -108,6 +109,12 @@ const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { std::optional Count = MBFI->getBlockProfileCount(&MBB); + + // Temporary hack to cope with AArch64's jump table encoding + const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); + if (!TII.isMBBSafeToSplitToCold(MBB)) + return false; + // For instrumentation profiles and sample profiles, we use different ways // to judge whether a block is cold and should be split. if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -325,6 +325,8 @@ std::optional isAddImmediate(const MachineInstr &MI, Register Reg) const override; + bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; + std::optional describeLoadedValue(const MachineInstr &MI, Register Reg) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8368,6 +8368,38 @@ return std::nullopt; } +bool AArch64InstrInfo::isMBBSafeToSplitToCold( + const MachineBasicBlock &MBB) const { + // Because jump tables are label-relative instead of table-relative, they all + // must be in the same section or else relocation fixup handling will throw a + // fit. + auto isJumpTableLookup = [](const MachineInstr &MI) { + switch (MI.getOpcode()) { + case TargetOpcode::G_BRJT: + case AArch64::JumpTableDest32: + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + return true; + default: + return false; + } + }; + auto containsJumpTableLookup = [&](const MachineBasicBlock &MBB) { + return llvm::any_of(MBB, isJumpTableLookup); + }; + + auto isInJumpTable = [&](const MachineJumpTableEntry &JTE) { + return llvm::find(JTE.MBBs, &MBB) != JTE.MBBs.end(); + }; + auto isJumpTableTarget = [&](const MachineBasicBlock &MBB) { + const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo(); + return MJTI != nullptr && + llvm::any_of(MJTI->getJumpTables(), isInJumpTable); + }; + + return !containsJumpTableLookup(MBB) && !isJumpTableTarget(MBB); +} + std::optional AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { diff --git a/llvm/test/CodeGen/AArch64/machine-function-splitter.ll b/llvm/test/CodeGen/AArch64/machine-function-splitter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-function-splitter.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -aarch64-redzone | FileCheck %s -check-prefix=MFS-REDZONE + +define i32 @nosplit_redzone(i1 zeroext %0, i32 %a, i32 %b) nounwind !prof !14 !section_prefix !15 { +;; Check that cold blocks in functions with red zones aren't split. +; MFS-REDZONE-LABEL: nosplit_redzone +; MFS-REDZONE-NOT: nosplit_redzone.cold: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %x = alloca i32, align 4 + + br i1 %0, label %2, label %3, !prof !16 + +2: ; preds = %1 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + br label %4 + +3: ; preds = %1 + store i32 %a, ptr %b.addr, align 4 + store i32 %b, ptr %a.addr, align 4 + br label %4 + +4: ; preds = %3, %2 + %tmp = load i32, ptr %a.addr, align 4 + %tmp1 = load i32, ptr %b.addr, align 4 + %add = add nsw i32 %tmp, %tmp1 + store i32 %add, ptr %x, align 4 + %tmp2 = load i32, ptr %x, align 4 + ret i32 %tmp2 +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 9000} +!15 = !{!"function_section_prefix", !"hot"} +!16 = !{!"branch_weights", i32 7000, i32 0}