diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -79,6 +79,10 @@ }; SmallVector BlockInfo; + + // The basic block after which trampolines are inserted. This is the last + // basic block that isn't in the cold section. + MachineBasicBlock *TrampolineInsertionPoint; std::unique_ptr RS; LivePhysRegs LiveRegs; @@ -166,16 +170,27 @@ void BranchRelaxation::scanFunction() { BlockInfo.clear(); BlockInfo.resize(MF->getNumBlockIDs()); + TrampolineInsertionPoint = nullptr; // First thing, compute the size of all basic blocks, and see if the function // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineBasicBlock &MBB : *MF) + // instructions in the inline assembly. At the same time, place the + // trampoline insertion point at the end of the hot portion of the function. + for (MachineBasicBlock &MBB : *MF) { BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB); + if (MBB.getSectionID() != MBBSectionID::ColdSectionID) + TrampolineInsertionPoint = &MBB; + } + // Compute block offsets and known bits. adjustBlockOffsets(*MF->begin()); + + if (TrampolineInsertionPoint == nullptr) { + LLVM_DEBUG(dbgs() << " No suitable trampoline insertion point found in " + << MF->getName()); + } } /// computeBlockSize - Compute the size for MBB. @@ -376,6 +391,49 @@ assert(!Fail && "branches to be relaxed must be analyzable"); (void)Fail; + // For cross-section branches whose destination is in the cold section, add a + // "trampoline branch", which unconditionally branches to the branch + // destination. Place the trampoline branch at the end of the function and + // retarget the conditional branch to the trampoline. + // tbz L1 + // => + // tbz L1Trampoline + // ... + // L1Trampoline: b L1 + if (MBB->getSectionID() != TBB->getSectionID() && + TBB->getSectionID() == MBBSectionID::ColdSectionID && + TrampolineInsertionPoint != nullptr) { + // If the insertion point is out of range, we can't put a trampoline there. + NewBB = + createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock()); + + if (isBlockInRange(MI, *NewBB)) { + LLVM_DEBUG(dbgs() << " Retarget destination to trampoline at " + << NewBB->back()); + + insertUncondBranch(NewBB, TBB); + + // Update the successor lists to include the trampoline. + MBB->replaceSuccessor(TBB, NewBB); + NewBB->addSuccessor(TBB); + + // Replace branch in the current (MBB) block. + removeBranch(MBB); + insertBranch(MBB, NewBB, FBB, Cond); + + TrampolineInsertionPoint = NewBB; + finalizeBlockChanges(MBB, NewBB); + return true; + } + + LLVM_DEBUG( + dbgs() << " Trampoline insertion point out of range for Bcc from " + << printMBBReference(*MBB) << " to " << printMBBReference(*TBB) + << ".\n"); + TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection()); + MF->erase(NewBB); + } + // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 @@ -665,6 +723,7 @@ LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); BlockInfo.clear(); + TrampolineInsertionPoint = nullptr; return MadeChange; } diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s --- | declare i32 @bar() @@ -21,6 +22,73 @@ br label %end } + define void @tbz_hot_to_cold(i1 zeroext %0) { + br i1 %0, label %hot_block, label %cold_block + + hot_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %cold_block, %hot_block + %3 = tail call i32 @qux() + ret void + + cold_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_no_valid_tramp(i1 zeroext %0) { + br i1 %0, label %hot, label %cold + + hot: ; preds = %1 + %2 = call i32 @baz() + call void asm sideeffect ".space 1024", ""() + br label %end + + end: ; preds = %cold, %hot + %3 = tail call i32 @qux() + ret void + + cold: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_cold_to_hot(i1 zeroext %0) #0 { + br i1 %0, label %cold_block, label %hot_block + + cold_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %hot_block, %cold_block + %3 = tail call i32 @qux() + ret void + + hot_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + define void @tbz_tramp_pushed_oob(i1 zeroext %0, i1 zeroext %1) { + entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + br i1 %0, label %unrelaxable, label %cold + + unrelaxable: ; preds = %entry + br i1 %1, label %end, label %cold + + end: ; preds = %unrelaxable + call void asm sideeffect ".space 996", ""() + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + + cold: ; preds = %entry, %unrelaxable + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + } + ... --- name: relax_tbz @@ -69,3 +137,201 @@ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp ... +--- +name: tbz_hot_to_cold +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: tbz_hot_to_cold + ; COM: Check that branch relaxation relaxes cross-section conditional + ; COM: branches by creating trampolines after all other hot basic blocks. + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK: TBZW + ; CHECK-SAME: %bb.3 + ; CHECK: bb.1.hot_block: + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cold_block (bbsections Cold): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + bb.0 (%ir-block.1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + TBZW killed renamable $w0, 0, %bb.2 + + bb.1.hot_block: + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.cold_block (bbsections Cold): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_no_valid_tramp +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: tbz_no_valid_tramp + ; COM: Check that branch relaxation doesn't insert a trampoline if there is no + ; COM: viable insertion location. + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK: CBNZW + ; CHECK-SAME: %bb.1 + ; CHECK-NEXT: B + ; CHECK-SAME: %bb.3 + ; CHECK: bb.1.hot: + ; CHECK: TCRETURNdi + ; CHECK: bb.2.cold (bbsections Cold): + ; CHECK: TCRETURNdi + bb.0 (%ir-block.1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + CBZW killed renamable $w0, %bb.2 + + bb.1.hot: + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + INLINEASM &".space 1024", 1 /* sideeffect attdialect */ + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.cold (bbsections Cold): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_cold_to_hot +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: tbz_cold_to_hot + ; COM: Check that relaxation of conditional branches from the Cold section to + ; COM: the Hot section doesn't modify the Hot section. + ; CHECK: bb.0 (%ir-block.1, bbsections Cold): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: CBNZW + ; CHECK-SAME: %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK: bb.1.cold_block (bbsections Cold): + ; CHECK: TCRETURNdi + ; CHECK: bb.2.hot_block: + ; CHECK: TCRETURNdi + bb.0 (%ir-block.1, bbsections Cold): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + CBZW killed renamable $w0, %bb.2 + + bb.1.cold_block (bbsections Cold): + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.hot_block: + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +... +--- +name: tbz_tramp_pushed_oob +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } + - { reg: '$w1', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; INDIRECT-LABEL: name: tbz_tramp_pushed_oob + ; COM: Check that a conditional branch to a trampoline is properly relaxed + ; COM: if the trampoline is pushed out of range. + ; INDIRECT: bb.0.entry: + ; INDIRECT-NEXT: successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000) + ; INDIRECT: TBNZW + ; INDIRECT-SAME: %bb.1 + ; INDIRECT-NEXT: B{{ }} + ; INDIRECT-SAME: %[[TRAMP1]] + ; INDIRECT: bb.1.unrelaxable: + ; INDIRECT-NEXT: successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000) + ; INDIRECT: TBNZW + ; INDIRECT-SAME: %bb.2 + ; INDIRECT: [[TRAMP2]] + ; INDIRECT-NEXT: successors: %bb.3(0x80000000) + ; INDIRECT: bb.2.end: + ; INDIRECT: TCRETURNdi + ; INDIRECT: [[TRAMP1]].entry: + ; INDIRECT: successors: %bb.3(0x80000000) + ; INDIRECT-NOT: bbsections Cold + ; INDIRECT: bb.3.cold (bbsections Cold): + ; INDIRECT: TCRETURNdi + + bb.0.entry (%ir-block.entry): + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $w0, $w1, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + TBZW killed renamable $w0, 0, %bb.3 + + bb.1.unrelaxable: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $w1, $x16 + + TBNZW killed renamable $w1, 0, %bb.2 + + B %bb.3 + + bb.2.end: + liveins: $x16 + + INLINEASM &".space 996", 1 /* sideeffect attdialect */ + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.3.cold (bbsections Cold): + liveins: $x16 + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + +...