diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -232,6 +232,9 @@ /// target default. CodeModel::Model getCodeModel() const { return CMModel; } + /// Returns the maximum code size possible under the code model. + uint64_t getMaxCodeSize() const; + /// Set the code model. void setCodeModel(CodeModel::Model CM) { CMModel = CM; } diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -84,6 +85,7 @@ MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; + const TargetMachine *TM = nullptr; bool relaxBranchInstructions(); void scanFunction(); @@ -232,6 +234,11 @@ MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB); MF->insert(++OrigMBB.getIterator(), NewBB); + // Place the new block in the same section as OrigBB + NewBB->setSectionID(OrigMBB.getSectionID()); + NewBB->setIsEndSection(OrigMBB.isEndSection()); + OrigMBB.setIsEndSection(false); + // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); @@ -241,8 +248,9 @@ /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, - MachineBasicBlock *DestBB) { +MachineBasicBlock * +BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB) { MachineBasicBlock *OrigBB = MI.getParent(); // Create a new MBB for the code after the OrigBB. @@ -250,6 +258,11 @@ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MF->insert(++OrigBB->getIterator(), NewBB); + // Place the new block in the same section as OrigBB. + NewBB->setSectionID(OrigBB->getSectionID()); + NewBB->setIsEndSection(OrigBB->isEndSection()); + OrigBB->setIsEndSection(false); + // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end()); @@ -300,7 +313,12 @@ int64_t BrOffset = getInstrOffset(MI); int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset; - if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) + const MachineBasicBlock *SrcBB = MI.getParent(); + + if (TII->isBranchOffsetInRange(MI.getOpcode(), + SrcBB->getSectionID() != DestBB.getSectionID() + ? TM->getMaxCodeSize() + : DestOffset - BrOffset)) return true; LLVM_DEBUG(dbgs() << "Out of range branch to destination " @@ -462,7 +480,10 @@ int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset; int64_t SrcOffset = getInstrOffset(MI); - assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset)); + assert(!TII->isBranchOffsetInRange( + MI.getOpcode(), MBB->getSectionID() != DestBB->getSectionID() + ? TM->getMaxCodeSize() + : DestOffset - SrcOffset)); BlockInfo[MBB->getNumber()].Size -= OldBrSize; @@ -492,9 +513,15 @@ // be erased. MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), DestBB->getBasicBlock()); + std::prev(RestoreBB->getIterator()) + ->setIsEndSection(RestoreBB->isEndSection()); + RestoreBB->setIsEndSection(false); TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, - DestOffset - SrcOffset, RS.get()); + BranchBB->getSectionID() != DestBB->getSectionID() + ? TM->getMaxCodeSize() + : DestOffset - SrcOffset, + RS.get()); BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); @@ -525,6 +552,11 @@ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); // Update the offset starting from the previous block. adjustBlockOffsets(*PrevBB); + + // Fix up section information for RestoreBB and DestBB + RestoreBB->setSectionID(DestBB->getSectionID()); + RestoreBB->setIsBeginSection(DestBB->isBeginSection()); + DestBB->setIsBeginSection(false); } else { // Remove restore block if it's not required. MF->erase(RestoreBB); @@ -553,7 +585,7 @@ // Unconditional branch destination might be unanalyzable, assume these // are OK. if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) { - if (!isBlockInRange(*Last, *DestBB)) { + if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last)) { fixupUnconditionalBranch(*Last); ++NumUnconditionalRelaxed; Changed = true; @@ -607,6 +639,7 @@ const TargetSubtargetInfo &ST = MF->getSubtarget(); TII = ST.getInstrInfo(); + TM = &MF->getTarget(); TRI = ST.getRegisterInfo(); if (TRI->trackLivenessAfterRegAlloc(*MF)) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -213,6 +213,11 @@ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -69,6 +70,10 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)")); +static cl::opt + BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), + cl::desc("Restrict range of B instructions (DEBUG)")); + AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), @@ -190,7 +195,7 @@ default: llvm_unreachable("unexpected opcode!"); case AArch64::B: - return 64; + return BDisplacementBits; case AArch64::TBNZW: case AArch64::TBZW: case AArch64::TBNZX: @@ -235,6 +240,68 @@ } } +void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + assert(RestoreBB.empty() && + "restore block should be inserted for restoring clobbered registers"); + + auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) { + // Offsets outside of the signed 33-bit range are not supported for ADRP + + // ADD. + if (!isInt<33>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 33-bit range not supported"); + + BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg) + .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE); + BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg) + .addReg(Reg) + .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC) + .addImm(0); + BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg); + }; + + RS->enterBasicBlockEnd(MBB); + Register Reg = RS->FindUnusedReg(&AArch64::GPR64RegClass); + + // If there's a free register, manually insert the indirect branch using it. + if (Reg != AArch64::NoRegister) { + buildIndirectBranch(Reg, NewDestBB); + RS->setRegUsed(Reg); + return; + } + + // Otherwise, spill and use X16. This briefly moves the stack pointer, making + // it incompatible with red zones. + AArch64FunctionInfo *AFI = MBB.getParent()->getInfo(); + if (!AFI || AFI->hasRedZone().value_or(true)) + report_fatal_error( + "Unable to insert indirect branch inside function that has red zone"); + + Reg = AArch64::X16; + BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(Reg) + .addReg(AArch64::SP) + .addImm(-16); + + buildIndirectBranch(Reg, RestoreBB); + + BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(Reg, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); +} + // Branch analysis. bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -78,6 +78,19 @@ /// and dynamic-no-pic. Reloc::Model TargetMachine::getRelocationModel() const { return RM; } +uint64_t TargetMachine::getMaxCodeSize() const { + switch (getCodeModel()) { + case CodeModel::Tiny: + return llvm::maxUIntN(10); + case CodeModel::Small: + case CodeModel::Kernel: + case CodeModel::Medium: + return llvm::maxUIntN(31); + case CodeModel::Large: + return llvm::maxUIntN(64); + } +} + /// Get the IR-specified TLS model for Var. static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) { switch (GV->getThreadLocalMode()) { diff --git a/llvm/test/CodeGen/AArch64/branch-relax-b.ll b/llvm/test/CodeGen/AArch64/branch-relax-b.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/branch-relax-b.ll @@ -0,0 +1,139 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu --verify-machineinstrs -aarch64-b-offset-bits=9 -aarch64-tbz-offset-bits=6 -aarch64-cbz-offset-bits=6 -aarch64-bcc-offset-bits=6 | FileCheck %s + +define void @relax_b_nospill(i1 zeroext %0) { +; CHECK-LABEL: relax_b_nospill: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbnz w0, +; CHECK-SAME: LBB0_1 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: adrp [[SCAVENGED_REGISTER:x[0-9]+]], .LBB0_2 +; CHECK-NEXT: add [[SCAVENGED_REGISTER]], [[SCAVENGED_REGISTER]], :lo12:.LBB0_2 +; CHECK-NEXT: br [[SCAVENGED_REGISTER]] +; CHECK-NEXT: .LBB0_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 8 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +entry: + br i1 %0, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect ".space 8", ""() + ret void +} + +define void @relax_b_spill() { +; CHECK-LABEL: relax_b_spill: // @relax_b_spill +; CHECK: // %bb.0: // %entry +; CHECK-COUNT-5: // 16-byte Folded Spill +; CHECK-NOT: // 16-byte Folded Spill +; CHECK: //APP +; CHECK-COUNT-29: mov {{x[0-9]+}}, +; CHECK-NOT: mov {{x[0-9]+}}, +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: b.eq .LBB1_1 +; CHECK-NEXT: // %bb.4: // %entry +; CHECK-NEXT: str [[SPILL_REGISTER:x[0-9]+]], [sp, +; CHECK-SAME: -16]! +; CHECK-NEXT: adrp [[SPILL_REGISTER:x[0-9]+]], .LBB1_5 +; CHECK-NEXT: add [[SPILL_REGISTER:x[0-9]+]], [[SPILL_REGISTER:x[0-9]+]], :lo12:.LBB1_5 +; CHECK-NEXT: br [[SPILL_REGISTER:x[0-9]+]] +; CHECK-NEXT: .LBB1_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: b .LBB1_3 +; CHECK-NEXT: .LBB1_5: // %iffalse +; CHECK-NEXT: ldr [[SPILL_REGISTER:x[0-9]+]], [sp], +; CHECK-SAME: 16 +; CHECK-NEXT: // %bb.2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-COUNT-29: // reg use {{x[0-9]+}} +; CHECK-NOT: // reg use {{x[0-9]+}} +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: .LBB1_3: // %common.ret +; CHECK-COUNT-5: // 16-byte Folded Reload +; CHECK-NOT: // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %x0 = call i64 asm sideeffect "mov x0, 1", "={x0}"() + %x1 = call i64 asm sideeffect "mov x1, 1", "={x1}"() + %x2 = call i64 asm sideeffect "mov x2, 1", "={x2}"() + %x3 = call i64 asm sideeffect "mov x3, 1", "={x3}"() + %x4 = call i64 asm sideeffect "mov x4, 1", "={x4}"() + %x5 = call i64 asm sideeffect "mov x5, 1", "={x5}"() + %x6 = call i64 asm sideeffect "mov x6, 1", "={x6}"() + %x7 = call i64 asm sideeffect "mov x7, 1", "={x7}"() + %x8 = call i64 asm sideeffect "mov x8, 1", "={x8}"() + %x9 = call i64 asm sideeffect "mov x9, 1", "={x9}"() + %x10 = call i64 asm sideeffect "mov x10, 1", "={x10}"() + %x11 = call i64 asm sideeffect "mov x11, 1", "={x11}"() + %x12 = call i64 asm sideeffect "mov x12, 1", "={x12}"() + %x13 = call i64 asm sideeffect "mov x13, 1", "={x13}"() + %x14 = call i64 asm sideeffect "mov x14, 1", "={x14}"() + %x15 = call i64 asm sideeffect "mov x15, 1", "={x15}"() + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + %x17 = call i64 asm sideeffect "mov x17, 1", "={x17}"() + %x18 = call i64 asm sideeffect "mov x18, 1", "={x18}"() + %x19 = call i64 asm sideeffect "mov x19, 1", "={x19}"() + %x20 = call i64 asm sideeffect "mov x20, 1", "={x20}"() + %x21 = call i64 asm sideeffect "mov x21, 1", "={x21}"() + %x22 = call i64 asm sideeffect "mov x22, 1", "={x22}"() + %x23 = call i64 asm sideeffect "mov x23, 1", "={x23}"() + %x24 = call i64 asm sideeffect "mov x24, 1", "={x24}"() + %x25 = call i64 asm sideeffect "mov x25, 1", "={x25}"() + %x26 = call i64 asm sideeffect "mov x26, 1", "={x26}"() + %x27 = call i64 asm sideeffect "mov x27, 1", "={x27}"() + %x28 = call i64 asm sideeffect "mov x28, 1", "={x28}"() + + %cmp = icmp eq i64 %x16, %x15 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{x0}"(i64 %x0) + call void asm sideeffect "# reg use $0", "{x1}"(i64 %x1) + call void asm sideeffect "# reg use $0", "{x2}"(i64 %x2) + call void asm sideeffect "# reg use $0", "{x3}"(i64 %x3) + call void asm sideeffect "# reg use $0", "{x4}"(i64 %x4) + call void asm sideeffect "# reg use $0", "{x5}"(i64 %x5) + call void asm sideeffect "# reg use $0", "{x6}"(i64 %x6) + call void asm sideeffect "# reg use $0", "{x7}"(i64 %x7) + call void asm sideeffect "# reg use $0", "{x8}"(i64 %x8) + call void asm sideeffect "# reg use $0", "{x9}"(i64 %x9) + call void asm sideeffect "# reg use $0", "{x10}"(i64 %x10) + call void asm sideeffect "# reg use $0", "{x11}"(i64 %x11) + call void asm sideeffect "# reg use $0", "{x12}"(i64 %x12) + call void asm sideeffect "# reg use $0", "{x13}"(i64 %x13) + call void asm sideeffect "# reg use $0", "{x14}"(i64 %x14) + call void asm sideeffect "# reg use $0", "{x15}"(i64 %x15) + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + call void asm sideeffect "# reg use $0", "{x17}"(i64 %x17) + call void asm sideeffect "# reg use $0", "{x18}"(i64 %x18) + call void asm sideeffect "# reg use $0", "{x19}"(i64 %x19) + call void asm sideeffect "# reg use $0", "{x20}"(i64 %x20) + call void asm sideeffect "# reg use $0", "{x21}"(i64 %x21) + call void asm sideeffect "# reg use $0", "{x22}"(i64 %x22) + call void asm sideeffect "# reg use $0", "{x23}"(i64 %x23) + call void asm sideeffect "# reg use $0", "{x24}"(i64 %x24) + call void asm sideeffect "# reg use $0", "{x25}"(i64 %x25) + call void asm sideeffect "# reg use $0", "{x26}"(i64 %x26) + call void asm sideeffect "# reg use $0", "{x27}"(i64 %x27) + call void asm sideeffect "# reg use $0", "{x28}"(i64 %x28) + ret void +} + +declare i32 @bar() +declare i32 @baz() \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -0,0 +1,75 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s + +--- | + target triple = "aarch64-unknown-linux-gnu" + declare i32 @bar() + declare i32 @baz() + declare i32 @qux() + + ; Function Attrs: nounwind + define void @relax_tbz(i1 zeroext %0) #0 { + br i1 %0, label %false_block, label %true_block + + false_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %true_block, %false_block + %3 = tail call i32 @qux() + ret void + + true_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + attributes #0 = { nounwind } + +... +--- +name: relax_tbz +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + ; CHECK-LABEL: name: relax_tbz + ; COM: Check that cross-section conditional branches are + ; COM: relaxed. + ; CHECK: bb.0 (%ir-block.1, bbsections 1): + ; CHECK-NEXT: successors: %bb.3(0x40000000) + ; CHECK: TBNZW + ; CHECK-SAME: %bb.3 + ; CHECK: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (%ir-block.1, bbsections 1): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.false_block (bbsections 2): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.true_block (bbsections 3): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + bb.0 (%ir-block.1, bbsections 1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + TBZW killed renamable $w0, 0, %bb.2 + B %bb.1 + + bb.1.false_block (bbsections 2): + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.true_block (bbsections 3): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp +...