diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -79,6 +79,9 @@ }; SmallVector BlockInfo; + MachineBasicBlock *TrampolineInsertionPoint; + SmallDenseSet> + RelaxedUnconditionals; std::unique_ptr RS; LivePhysRegs LiveRegs; @@ -144,7 +147,8 @@ if (MI.getOpcode() == TargetOpcode::FAULTING_OP) continue; MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); - assert(isBlockInRange(MI, *DestBB)); + assert(isBlockInRange(MI, *DestBB) || + RelaxedUnconditionals.contains({&MBB, DestBB})); } } #endif @@ -167,6 +171,9 @@ BlockInfo.clear(); BlockInfo.resize(MF->getNumBlockIDs()); + TrampolineInsertionPoint = nullptr; + RelaxedUnconditionals.clear(); + // First thing, compute the size of all basic blocks, and see if the function // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any @@ -176,6 +183,15 @@ // Compute block offsets and known bits. adjustBlockOffsets(*MF->begin()); + + // Place the trampoline insertion point at the end of the hot portion of the + // function. + for (MachineBasicBlock &MBB : reverse(*MF)) { + if (MBB.getSectionID() != MBBSectionID::ColdSectionID) { + TrampolineInsertionPoint = &MBB; + break; + } + } } /// computeBlockSize - Compute the size for MBB. @@ -503,6 +519,8 @@ BranchBB->sortUniqueLiveIns(); BranchBB->addSuccessor(DestBB); MBB->replaceSuccessor(DestBB, BranchBB); + if (TrampolineInsertionPoint == MBB) + TrampolineInsertionPoint = BranchBB; } DebugLoc DL = MI.getDebugLoc(); @@ -528,6 +546,24 @@ // If RestoreBB is required, try to place just before DestBB. if (!RestoreBB->empty()) { + if (MBB->getSectionID() == MBBSectionID::ColdSectionID && MBB && + DestBB->getSectionID() != MBBSectionID::ColdSectionID) { + // If the jump is Cold -> Hot, then don't place the restore block in + // the middle of the function. Place it at the end. + MachineBasicBlock *NewBB = createNewBlockAfter(*TrampolineInsertionPoint); + TII->insertUnconditionalBranch(*NewBB, DestBB, DebugLoc()); + BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB); + + // New trampolines should be inserted after NewBB + TrampolineInsertionPoint = NewBB; + + // Retarget the unconditional branch to the trampoline block + BranchBB->replaceSuccessor(DestBB, NewBB); + NewBB->addSuccessor(DestBB); + + DestBB = NewBB; + } + // TODO: For multiple far branches to the same destination, there are // chances that some restore blocks could be shared if they clobber the // same registers and share the same restore sequence. So far, those @@ -557,9 +593,11 @@ RestoreBB->setSectionID(DestBB->getSectionID()); RestoreBB->setIsBeginSection(DestBB->isBeginSection()); DestBB->setIsBeginSection(false); + RelaxedUnconditionals.insert({BranchBB, RestoreBB}); } else { // Remove restore block if it's not required. MF->erase(RestoreBB); + RelaxedUnconditionals.insert({BranchBB, DestBB}); } return true; @@ -585,7 +623,8 @@ // Unconditional branch destination might be unanalyzable, assume these // are OK. if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) { - if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last)) { + if (!isBlockInRange(*Last, *DestBB) && !TII->isTailCall(*Last) && + !RelaxedUnconditionals.contains({&MBB, DestBB})) { fixupUnconditionalBranch(*Last); ++NumUnconditionalRelaxed; Changed = true; @@ -665,6 +704,8 @@ LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); BlockInfo.clear(); + TrampolineInsertionPoint = nullptr; + RelaxedUnconditionals.clear(); return MadeChange; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -270,10 +270,20 @@ }; RS->enterBasicBlockEnd(MBB); - Register Reg = RS->FindUnusedReg(&AArch64::GPR64RegClass); + // If X16 is unused, then we can rely on the linker to safely insert the + // indirect branch. + Register Reg = AArch64::X16; + if (!RS->isRegUsed(Reg)) { + insertUnconditionalBranch(MBB, &NewDestBB, DL); + RS->setRegUsed(Reg); + return; + } - // If there's a free register, manually insert the indirect branch using it. - if (Reg != AArch64::NoRegister) { + // If there's a free register and it's worth inflating the code size, + // manually insert the indirect branch. + Reg = RS->FindUnusedReg(&AArch64::GPR64RegClass); + if (Reg != AArch64::NoRegister && + MBB.getSectionID() == MBBSectionID::ColdSectionID) { buildIndirectBranch(Reg, NewDestBB); RS->setRegUsed(Reg); return; @@ -285,7 +295,7 @@ if (!AFI || AFI->hasRedZone().value_or(true)) report_fatal_error( "Unable to insert indirect branch inside function that has red zone"); - + // Spill X16 so that the linker can safely insert the indirect branch. Reg = AArch64::X16; BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre)) .addReg(AArch64::SP, RegState::Define) @@ -293,7 +303,7 @@ .addReg(AArch64::SP) .addImm(-16); - buildIndirectBranch(Reg, RestoreBB); + BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB); BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost)) .addReg(AArch64::SP, RegState::Define) diff --git a/llvm/test/CodeGen/AArch64/branch-relax-b.ll b/llvm/test/CodeGen/AArch64/branch-relax-b.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-b.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-b.ll @@ -6,9 +6,7 @@ ; CHECK-NEXT: tbnz w0, ; CHECK-SAME: LBB0_1 ; CHECK-NEXT: // %bb.3: // %entry -; CHECK-NEXT: adrp [[SCAVENGED_REGISTER:x[0-9]+]], .LBB0_2 -; CHECK-NEXT: add [[SCAVENGED_REGISTER]], [[SCAVENGED_REGISTER]], :lo12:.LBB0_2 -; CHECK-NEXT: br [[SCAVENGED_REGISTER]] +; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 @@ -44,9 +42,7 @@ ; CHECK-NEXT: // %bb.4: // %entry ; CHECK-NEXT: str [[SPILL_REGISTER:x[0-9]+]], [sp, ; CHECK-SAME: -16]! -; CHECK-NEXT: adrp [[SPILL_REGISTER:x[0-9]+]], .LBB1_5 -; CHECK-NEXT: add [[SPILL_REGISTER:x[0-9]+]], [[SPILL_REGISTER:x[0-9]+]], :lo12:.LBB1_5 -; CHECK-NEXT: br [[SPILL_REGISTER:x[0-9]+]] +; CHECK-NEXT: b .LBB1_5 ; CHECK-NEXT: .LBB1_1: // %iftrue ; CHECK-NEXT: //APP ; CHECK-NEXT: .zero 2048 @@ -135,5 +131,44 @@ ret void } +define void @relax_b_x16_taken() { +; CHECK-LABEL: relax_b_x16_taken: // @relax_b_x16_taken +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: mov x16, #1 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cbnz x16, .LBB2_1 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: str x16, [sp, +; CHECK-SAME: -16]! +; CHECK-NEXT: b .LBB2_4 +; CHECK-NEXT: .LBB2_1: // %iftrue +; CHECK-NEXT: //APP +; CHECK-NEXT: .zero 2048 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_4: // %iffalse +; CHECK-NEXT: ldr x16, [sp], +; CHECK-SAME: 16 +; CHECK-NEXT: // %bb.2: // %iffalse +; CHECK-NEXT: //APP +; CHECK-NEXT: // reg use x16 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + + %cmp = icmp eq i64 %x16, 0 + br i1 %cmp, label %iffalse, label %iftrue + +iftrue: + call void asm sideeffect ".space 2048", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void +} + declare i32 @bar() declare i32 @baz() \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir --- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation %s -o - | FileCheck %s --- | target triple = "aarch64-unknown-linux-gnu" @@ -22,6 +22,90 @@ %4 = call i32 @bar() br label %end } + + define void @x16_used_cold_to_hot() { + entry: + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + %cmp = icmp eq i64 %x16, 0 + br i1 %cmp, label %iffalse, label %iftrue + + iftrue: ; preds = %entry + call void asm sideeffect ".space 4", ""() + br label %iffalse + + iffalse: ; preds = %iftrue, %entry + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + ret void + } + + define void @all_used_cold_to_hot() { + entry: + %x0 = call i64 asm sideeffect "mov x0, 1", "={x0}"() + %x1 = call i64 asm sideeffect "mov x1, 1", "={x1}"() + %x2 = call i64 asm sideeffect "mov x2, 1", "={x2}"() + %x3 = call i64 asm sideeffect "mov x3, 1", "={x3}"() + %x4 = call i64 asm sideeffect "mov x4, 1", "={x4}"() + %x5 = call i64 asm sideeffect "mov x5, 1", "={x5}"() + %x6 = call i64 asm sideeffect "mov x6, 1", "={x6}"() + %x7 = call i64 asm sideeffect "mov x7, 1", "={x7}"() + %x8 = call i64 asm sideeffect "mov x8, 1", "={x8}"() + %x9 = call i64 asm sideeffect "mov x9, 1", "={x9}"() + %x10 = call i64 asm sideeffect "mov x10, 1", "={x10}"() + %x11 = call i64 asm sideeffect "mov x11, 1", "={x11}"() + %x12 = call i64 asm sideeffect "mov x12, 1", "={x12}"() + %x13 = call i64 asm sideeffect "mov x13, 1", "={x13}"() + %x14 = call i64 asm sideeffect "mov x14, 1", "={x14}"() + %x15 = call i64 asm sideeffect "mov x15, 1", "={x15}"() + %x17 = call i64 asm sideeffect "mov x17, 1", "={x17}"() + %x18 = call i64 asm sideeffect "mov x18, 1", "={x18}"() + %x19 = call i64 asm sideeffect "mov x19, 1", "={x19}"() + %x20 = call i64 asm sideeffect "mov x20, 1", "={x20}"() + %x21 = call i64 asm sideeffect "mov x21, 1", "={x21}"() + %x22 = call i64 asm sideeffect "mov x22, 1", "={x22}"() + %x23 = call i64 asm sideeffect "mov x23, 1", "={x23}"() + %x24 = call i64 asm sideeffect "mov x24, 1", "={x24}"() + %x25 = call i64 asm sideeffect "mov x25, 1", "={x25}"() + %x26 = call i64 asm sideeffect "mov x26, 1", "={x26}"() + %x27 = call i64 asm sideeffect "mov x27, 1", "={x27}"() + %x28 = call i64 asm sideeffect "mov x28, 1", "={x28}"() + br label %cold + + cold: ; preds = %entry + %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"() + br label %exit + + exit: ; preds = %cold + call void asm sideeffect "# reg use $0", "{x0}"(i64 %x0) + call void asm sideeffect "# reg use $0", "{x1}"(i64 %x1) + call void asm sideeffect "# reg use $0", "{x2}"(i64 %x2) + call void asm sideeffect "# reg use $0", "{x3}"(i64 %x3) + call void asm sideeffect "# reg use $0", "{x4}"(i64 %x4) + call void asm sideeffect "# reg use $0", "{x5}"(i64 %x5) + call void asm sideeffect "# reg use $0", "{x6}"(i64 %x6) + call void asm sideeffect "# reg use $0", "{x7}"(i64 %x7) + call void asm sideeffect "# reg use $0", "{x8}"(i64 %x8) + call void asm sideeffect "# reg use $0", "{x9}"(i64 %x9) + call void asm sideeffect "# reg use $0", "{x10}"(i64 %x10) + call void asm sideeffect "# reg use $0", "{x11}"(i64 %x11) + call void asm sideeffect "# reg use $0", "{x12}"(i64 %x12) + call void asm sideeffect "# reg use $0", "{x13}"(i64 %x13) + call void asm sideeffect "# reg use $0", "{x14}"(i64 %x14) + call void asm sideeffect "# reg use $0", "{x15}"(i64 %x15) + call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16) + call void asm sideeffect "# reg use $0", "{x17}"(i64 %x17) + call void asm sideeffect "# reg use $0", "{x18}"(i64 %x18) + call void asm sideeffect "# reg use $0", "{x19}"(i64 %x19) + call void asm sideeffect "# reg use $0", "{x20}"(i64 %x20) + call void asm sideeffect "# reg use $0", "{x21}"(i64 %x21) + call void asm sideeffect "# reg use $0", "{x22}"(i64 %x22) + call void asm sideeffect "# reg use $0", "{x23}"(i64 %x23) + call void asm sideeffect "# reg use $0", "{x24}"(i64 %x24) + call void asm sideeffect "# reg use $0", "{x25}"(i64 %x25) + call void asm sideeffect "# reg use $0", "{x26}"(i64 %x26) + call void asm sideeffect "# reg use $0", "{x27}"(i64 %x27) + call void asm sideeffect "# reg use $0", "{x28}"(i64 %x28) + ret void + } attributes #0 = { nounwind } @@ -73,3 +157,259 @@ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp ... +--- +name: x16_used_cold_to_hot +tracksRegLiveness: true +liveins: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: x16_used_cold_to_hot + ; COM: Check that unconditional branches from the cold section to + ; COM: the hot section manually insert indirect branches if x16 + ; COM: isn't available but there is still a free register. + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK: TBZW killed renamable $w8, 0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $x16 + ; CHECK: B %bb.4 + ; CHECK: bb.1.iffalse: + ; CHECK-NEXT: liveins: $x16 + ; CHECK: killed $x16 + ; CHECK: RET undef $lr + ; CHECK: bb.4.iftrue (bbsections Cold): + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp, $x16 = LDRXpost $sp, 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.iftrue (bbsections Cold): + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &".space 4", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.iftrue (bbsections Cold): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $[[SCAVENGED_REGISTER:x[0-9]+]] = ADRP target-flags(aarch64-page) + ; CHECK-NEXT: $[[SCAVENGED_REGISTER]] = ADDXri $[[SCAVENGED_REGISTER]], target-flags(aarch64-pageoff, aarch64-nc) , 0 + ; CHECK-NEXT: BR $[[SCAVENGED_REGISTER]] + + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + $sp = frame-setup SUBXri $sp, 16, 0 + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + dead renamable $x8 = SUBSXri $x16, 0, 0, implicit-def $nzcv + renamable $w8 = CSINCWr $wzr, $wzr, 1, implicit killed $nzcv + TBZW killed renamable $w8, 0, %bb.1 + + B %bb.2 + + bb.1.iffalse: + liveins: $x16 + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + $sp = frame-destroy ADDXri $sp, 16, 0 + RET undef $lr + + bb.2.iftrue (bbsections Cold): + successors: %bb.1(0x80000000) + liveins: $x16 + + INLINEASM &".space 4", 1 /* sideeffect attdialect */ + B %bb.1 +... +--- +name: all_used_cold_to_hot +tracksRegLiveness: true +frameInfo: + stackSize: 112 + maxAlignment: 16 +stack: + - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x20', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x21', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x22', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x23', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x24', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x25', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x26', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 8, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x27', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 9, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$x28', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 10, name: '', type: spill-slot, offset: -96, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: all_used_cold_to_hot + ; COM: Check that unconditional branches from the cold section to + ; COM: the hot section spill x16 and defer indirect branch + ; COM: insertion to the linker if there are no free general-purpose + ; COM: registers. + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 + ; CHECK-COUNT-29: INLINEASM &"mov + ; CHECK-NEXT: {{ $}} + ; CHECK: bb.3.entry: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $fp, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, + ; CHECK-SAME: $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, + ; CHECK-SAME: $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.exit: + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, + ; CHECK-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, + ; CHECK-SAME: $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-COUNT-30: INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed + ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.exit: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp, $x16 = LDRXpost $sp, 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.exit: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cold (bbsections Cold): + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, + ; CHECK-SAME: $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, + ; CHECK-SAME: $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.cold (bbsections Cold): + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: liveins: $fp, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, + ; CHECK-SAME: $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, + ; CHECK-SAME: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = STRXpre $x16, $sp, -16 + ; CHECK-NEXT: B %bb.6 + + bb.0.entry: + successors: %bb.2(0x80000000) + liveins: $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STRXui killed $fp, $sp, 2 :: (store (s64) into %stack.10) + frame-setup STPXi killed $x28, killed $x27, $sp, 4 :: (store (s64) into %stack.9), (store (s64) into %stack.8) + frame-setup STPXi killed $x26, killed $x25, $sp, 6 :: (store (s64) into %stack.7), (store (s64) into %stack.6) + frame-setup STPXi killed $x24, killed $x23, $sp, 8 :: (store (s64) into %stack.5), (store (s64) into %stack.4) + frame-setup STPXi killed $x22, killed $x21, $sp, 10 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + frame-setup STPXi killed $x20, killed $x19, $sp, 12 :: (store (s64) into %stack.1), (store (s64) into %stack.0) + INLINEASM &"mov x0, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x0 + INLINEASM &"mov x1, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x1 + INLINEASM &"mov x2, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x2 + INLINEASM &"mov x3, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x3 + INLINEASM &"mov x4, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x4 + INLINEASM &"mov x5, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x5 + INLINEASM &"mov x6, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x6 + INLINEASM &"mov x7, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x7 + INLINEASM &"mov x8, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x8 + INLINEASM &"mov x9, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x9 + INLINEASM &"mov x10, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x10 + INLINEASM &"mov x11, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x11 + INLINEASM &"mov x12, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x12 + INLINEASM &"mov x13, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x13 + INLINEASM &"mov x14, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x14 + INLINEASM &"mov x15, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x15 + INLINEASM &"mov x17, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x17 + INLINEASM &"mov x18, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x18 + INLINEASM &"mov x19, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x19 + INLINEASM &"mov x20, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x20 + INLINEASM &"mov x21, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x21 + INLINEASM &"mov x22, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x22 + INLINEASM &"mov x23, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x23 + INLINEASM &"mov x24, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x24 + INLINEASM &"mov x25, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x25 + INLINEASM &"mov x26, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x26 + INLINEASM &"mov x27, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x27 + INLINEASM &"mov x28, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x28 + INLINEASM &"mov fp, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $fp + B %bb.2 + + + bb.1.exit: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x0 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x1 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x2 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x3 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x4 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x5 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x6 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x7 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x8 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x9 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x10 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x11 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x12 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x13 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x14 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x15 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x17 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x18 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x19 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x20 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x21 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x22 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x23 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x24 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x25 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x26 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x27 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x28 + INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $fp + $x20, $x19 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + $x22, $x21 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + $x24, $x23 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.5), (load (s64) from %stack.4) + $x26, $x25 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.7), (load (s64) from %stack.6) + $x28, $x27 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8) + $fp = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.10) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr + + bb.2.cold (bbsections Cold): + successors: %bb.1(0x80000000) + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + + INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16 + B %bb.1 + +...