diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -595,6 +595,11 @@ llvm_unreachable("target did not implement"); } + /// \returns the cross section branch distance needed for the architecture. + virtual uint64_t getCrossSectionBranchDistance() const { + llvm_unreachable("target did not implement"); + } + /// \returns The block that branch instruction \p MI jumps to. virtual MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const { llvm_unreachable("target did not implement"); diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -232,6 +232,11 @@ MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB); MF->insert(++OrigMBB.getIterator(), NewBB); + // Place the new block in the same section as OrigBB + NewBB->setSectionID(OrigMBB.getSectionID()); + NewBB->setIsEndSection(OrigMBB.isEndSection()); + OrigMBB.setIsEndSection(false); + // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); @@ -241,8 +246,9 @@ /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, - MachineBasicBlock *DestBB) { +MachineBasicBlock * +BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB) { MachineBasicBlock *OrigBB = MI.getParent(); // Create a new MBB for the code after the OrigBB. @@ -250,6 +256,11 @@ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MF->insert(++OrigBB->getIterator(), NewBB); + // Place the new block in the same section as OrigBB. + NewBB->setSectionID(OrigBB->getSectionID()); + NewBB->setIsEndSection(OrigBB->isEndSection()); + OrigBB->setIsEndSection(false); + // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end()); @@ -300,7 +311,12 @@ int64_t BrOffset = getInstrOffset(MI); int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset; - if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) + const MachineBasicBlock *SrcBB = MI.getParent(); + + if (TII->isBranchOffsetInRange(MI.getOpcode(), + SrcBB->getSectionID() != DestBB.getSectionID() + ? TII->getCrossSectionBranchDistance() + : DestOffset - BrOffset)) return true; LLVM_DEBUG(dbgs() << "Out of range branch to destination " @@ -492,9 +508,15 @@ // be erased. MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), DestBB->getBasicBlock()); + std::prev(RestoreBB->getIterator()) + ->setIsEndSection(RestoreBB->isEndSection()); + RestoreBB->setIsEndSection(false); TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, - DestOffset - SrcOffset, RS.get()); + BranchBB->getSectionID() != DestBB->getSectionID() + ? TII->getCrossSectionBranchDistance() + : DestOffset - SrcOffset, + RS.get()); BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); @@ -525,6 +547,11 @@ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); // Update the offset starting from the previous block. adjustBlockOffsets(*PrevBB); + + // Fix up section information for RestoreBB and DestBB + RestoreBB->setSectionID(DestBB->getSectionID()); + RestoreBB->setIsBeginSection(DestBB->isBeginSection()); + DestBB->setIsBeginSection(false); } else { // Remove restore block if it's not required. MF->erase(RestoreBB); @@ -553,7 +580,7 @@ // Unconditional branch destination might be unanalyzable, assume these // are OK. if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) { - if (!isBlockInRange(*Last, *DestBB)) { + if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last)) { fixupUnconditionalBranch(*Last); ++NumUnconditionalRelaxed; Changed = true; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -208,8 +208,15 @@ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; + uint64_t getCrossSectionBranchDistance() const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -69,6 +70,10 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)")); +static cl::opt + BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), + cl::desc("Restrict range of B instructions (DEBUG)")); + AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), @@ -190,7 +195,7 @@ default: llvm_unreachable("unexpected opcode!"); case AArch64::B: - return 64; + return BDisplacementBits; case AArch64::TBNZW: case AArch64::TBZW: case AArch64::TBNZX: @@ -214,6 +219,14 @@ return isIntN(Bits, BrOffset / 4); } +uint64_t AArch64InstrInfo::getCrossSectionBranchDistance() const { + // The cross-section branch distance is whatever distance is too far for a B + // instruction but close enough for an ADRP instruction. + // B: 26 offset bits + // ADRP: 20 page number bits + 12 page address offset bits = 32 offset bits + return (1LL << 32) - 1; +} + MachineBasicBlock * AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -235,6 +248,80 @@ } } +void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + assert(RestoreBB.empty() && + "restore block should be inserted for restoring clobbered registers"); + + if (!isInt<33>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 33-bit range not supported"); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Get a scratch register. It's important to get a virtual register because + // the register scavenger doesn't work with empty blocks. + Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto EndInstr = MBB.end(); + + unsigned char AdrpFlags = AArch64II::MO_PAGE; + MachineInstr &AdrpInstr = + *BuildMI(MBB, EndInstr, DL, get(AArch64::ADRP), ScratchReg) + .addSym(NewDestBB.getSymbol(), AdrpFlags); + + unsigned char AddFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC; + MachineInstr &AddInstr = + *BuildMI(MBB, EndInstr, DL, get(AArch64::ADDXri), ScratchReg) + .addReg(ScratchReg) + .addSym(NewDestBB.getSymbol(), AddFlags) + .addImm(0); + + BuildMI(MBB, EndInstr, DL, get(AArch64::BR)).addReg(ScratchReg); + + // Try to get a physical register and spill one if none are available + RS->enterBasicBlockEnd(MBB); + Register Scav = RS->scavengeRegisterBackwards( + AArch64::GPR64RegClass, &AdrpInstr, /*RestoreAfter=*/false, /*SPAdj=*/0, + /*AllowSpill=*/false); + if (Scav != AArch64::NoRegister) { + RS->setRegUsed(Scav); + } else { + // When there is no scavenged register, it needs to specify its own. + // Use X16 because it should be used extremely rarely. + Scav = AArch64::X16; + + // Push the register to the stack + BuildMI(MBB, AdrpInstr, DL, get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(Scav) + .addReg(AArch64::SP) + .addImm(-16); + + // Set the operands appropriately + AdrpInstr.getOperand(1).ChangeToMCSymbol(RestoreBB.getSymbol(), AdrpFlags); + AddInstr.getOperand(2).ChangeToMCSymbol(RestoreBB.getSymbol(), AddFlags); + + // Restore the register from the stack + BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(Scav, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); + } + + MRI.replaceRegWith(ScratchReg, Scav); + MRI.clearVirtRegs(); +} + // Branch analysis. bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/llvm/test/CodeGen/AArch64/branch-relax-b.ll b/llvm/test/CodeGen/AArch64/branch-relax-b.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/branch-relax-b.ll @@ -0,0 +1,139 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu --verify-machineinstrs -aarch64-b-offset-bits=9 -aarch64-tbz-offset-bits=6 -aarch64-cbz-offset-bits=6 -aarch64-bcc-offset-bits=6 | FileCheck %s + +define void @relax_b_nospill(i1 zeroext %0) { +; CHECK-LABEL: relax_b_nospill: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbnz w0, +; CHECK-SAME: LBB0_1 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: adrp [[SCAVENGED_REGISTER:x[0-9]+]], .LBB0_2 +; CHECK-NEXT: add [[SCAVENGED_REGISTER]], [[SCAVENGED_REGISTER]], :lo12:.LBB0_2 +; CHECK-NEXT: br [[SCAVENGED_REGISTER]] +; CHECK-NEXT: .LBB0_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 8 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +entry: + br i1 %0, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect ".space 8", ""() + ret void +} + +define void @relax_b_spill() { +; CHECK-LABEL: relax_b_spill: // @relax_b_spill +; CHECK: // %bb.0: // %entry +; CHECK-COUNT-5: // 16-byte Folded Spill +; CHECK-NOT: // 16-byte Folded Spill +; CHECK: //APP +; CHECK-COUNT-29: mov {{x[0-9]+}}, +; CHECK-NOT: mov {{x[0-9]+}}, +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: b.eq .LBB1_1 +; CHECK-NEXT: // %bb.4: // %entry +; CHECK-NEXT: str [[SPILL_REGISTER:x[0-9]+]], [sp, +; CHECK-SAME: -16]! +; CHECK-NEXT: adrp [[SPILL_REGISTER:x[0-9]+]], .LBB1_5 +; CHECK-NEXT: add [[SPILL_REGISTER:x[0-9]+]], [[SPILL_REGISTER:x[0-9]+]], :lo12:.LBB1_5 +; CHECK-NEXT: br [[SPILL_REGISTER:x[0-9]+]] +; CHECK-NEXT: .LBB1_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: b .LBB1_3 +; CHECK-NEXT: .LBB1_5: // %iffalse +; CHECK-NEXT: ldr [[SPILL_REGISTER:x[0-9]+]], [sp], +; CHECK-SAME: 16 +; CHECK-NEXT: // %bb.2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-COUNT-29: // reg use {{x[0-9]+}} +; CHECK-NOT: // reg use {{x[0-9]+}} +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: .LBB1_3: // %common.ret +; CHECK-COUNT-5: // 16-byte Folded Reload +; CHECK-NOT: // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %x0 = call i64 asm sideeffect "mov x0, 1", "={x0}"() + %x1 = call i64 asm sideeffect "mov x1, 1", "={x1}"() + %x2 = call i64 asm sideeffect "mov x2, 1", "={x2}"() + %x3 = call i64 asm sideeffect "mov x3, 1", "={x3}"() + %x4 = call i64 asm sideeffect "mov x4, 1", "={x4}"() + %x5 = call i64 asm sideeffect "mov x5, 1", "={x5}"() + %x6 = call i64 asm sideeffect "mov x6, 1", "={x6}"() + %x7 = call i64 asm sideeffect "mov x7, 1", "={x7}"() + %x8 = call i64 asm sideeffect "mov x8, 1", "={x8}"() + %x9 = call i64 asm sideeffect "mov x9, 1", "={x9}"() + %x10 = call i64 asm sideeffect "mov x10, 1", "={x10}"() + %x11 = call i64 asm sideeffect "mov x11, 1", "={x11}"() + %x12 = call i64 asm sideeffect "mov x12, 1", "={x12}"() + %x13 = call i64 asm sideeffect "mov x13, 1", "={x13}"() + %x14 = call i64 asm sideeffect "mov x14, 1", "={x14}"() + %x15 = call i64 asm sideeffect "mov x15, 1", "={x15}"() + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + %x17 = call i64 asm sideeffect "mov x17, 1", "={x17}"() + %x18 = call i64 asm sideeffect "mov x18, 1", "={x18}"() + %x19 = call i64 asm sideeffect "mov x19, 1", "={x19}"() + %x20 = call i64 asm sideeffect "mov x20, 1", "={x20}"() + %x21 = call i64 asm sideeffect "mov x21, 1", "={x21}"() + %x22 = call i64 asm sideeffect "mov x22, 1", "={x22}"() + %x23 = call i64 asm sideeffect "mov x23, 1", "={x23}"() + %x24 = call i64 asm sideeffect "mov x24, 1", "={x24}"() + %x25 = call i64 asm sideeffect "mov x25, 1", "={x25}"() + %x26 = call i64 asm sideeffect "mov x26, 1", "={x26}"() + %x27 = call i64 asm sideeffect "mov x27, 1", "={x27}"() + %x28 = call i64 asm sideeffect "mov x28, 1", "={x28}"() + + %cmp = icmp eq i64 %x16, %x15 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{x0}"(i64 %x0) + call void asm sideeffect "# reg use $0", "{x1}"(i64 %x1) + call void asm sideeffect "# reg use $0", "{x2}"(i64 %x2) + call void asm sideeffect "# reg use $0", "{x3}"(i64 %x3) + call void asm sideeffect "# reg use $0", "{x4}"(i64 %x4) + call void asm sideeffect "# reg use $0", "{x5}"(i64 %x5) + call void asm sideeffect "# reg use $0", "{x6}"(i64 %x6) + call void asm sideeffect "# reg use $0", "{x7}"(i64 %x7) + call void asm sideeffect "# reg use $0", "{x8}"(i64 %x8) + call void asm sideeffect "# reg use $0", "{x9}"(i64 %x9) + call void asm sideeffect "# reg use $0", "{x10}"(i64 %x10) + call void asm sideeffect "# reg use $0", "{x11}"(i64 %x11) + call void asm sideeffect "# reg use $0", "{x12}"(i64 %x12) + call void asm sideeffect "# reg use $0", "{x13}"(i64 %x13) + call void asm sideeffect "# reg use $0", "{x14}"(i64 %x14) + call void asm sideeffect "# reg use $0", "{x15}"(i64 %x15) + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + call void asm sideeffect "# reg use $0", "{x17}"(i64 %x17) + call void asm sideeffect "# reg use $0", "{x18}"(i64 %x18) + call void asm sideeffect "# reg use $0", "{x19}"(i64 %x19) + call void asm sideeffect "# reg use $0", "{x20}"(i64 %x20) + call void asm sideeffect "# reg use $0", "{x21}"(i64 %x21) + call void asm sideeffect "# reg use $0", "{x22}"(i64 %x22) + call void asm sideeffect "# reg use $0", "{x23}"(i64 %x23) + call void asm sideeffect "# reg use $0", "{x24}"(i64 %x24) + call void asm sideeffect "# reg use $0", "{x25}"(i64 %x25) + call void asm sideeffect "# reg use $0", "{x26}"(i64 %x26) + call void asm sideeffect "# reg use $0", "{x27}"(i64 %x27) + call void asm sideeffect "# reg use $0", "{x28}"(i64 %x28) + ret void +} + +declare i32 @bar() +declare i32 @baz() \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -0,0 +1,119 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-cbz-offset-bits=8 -aarch64-tbz-offset-bits=8 %s -o - | FileCheck %s + +--- | + target triple = "aarch64-unknown-linux-gnu" + declare i32 @bar() + declare i32 @baz() + declare i32 @qux() + + ; Function Attrs: nounwind + define void @foo1(i1 zeroext %0) #0 { + br i1 %0, label %false_block, label %true_block + + false_block: ; preds = %1 + %2 = call i32 @baz() + br label %end + + end: ; preds = %true_block, %false_block + %3 = tail call i32 @qux() + ret void + + true_block: ; preds = %1 + %4 = call i32 @bar() + br label %end + } + + attributes #0 = { nounwind } + +... +--- +name: foo1 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: true + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: foo1 + ; COM: TODO: remove unnecessary function traits + ; COM: Check that cross-section conditional branches are + ; COM: relaxed. + ; CHECK: bb.0 (%ir-block.1, bbsections 1): + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: TBNZW + ; CHECK-SAME: %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (%ir-block.1, bbsections 1): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.false_block (bbsections 2): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.true_block (bbsections 3): + ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + bb.0 (%ir-block.1, bbsections 1): + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $w0, $lr + + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + TBZW killed renamable $w0, 0, %bb.2 + B %bb.1 + + bb.1.false_block (bbsections 2): + BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp + + bb.2.true_block (bbsections 3): + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp +...