diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -12,19 +12,40 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" +#include "AArch64RegisterInfo.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include +#include #define DEBUG_TYPE "aarch64-post-select-optimize" using namespace llvm; +static cl::opt + EnableFIOpts("aarch64-gisel-fi-opts", + cl::desc("Enable GlobalISel frame-index optimizations"), + cl::init(true), cl::Hidden); + +static cl::opt + FIOptStackMin("aarch64-gisel-fi-opts-stack-threshold", + cl::desc("GlobalISel frame-index opt stack size minimum"), + cl::init(2048), cl::Hidden); + +static cl::opt FIOptMinBlockReferences( + "aarch64-gisel-fi-opts-min-block-refs", + cl::desc("GlobalISel frame-index opt minimum references / block threshold"), + cl::init(6), cl::Hidden); + namespace { class AArch64PostSelectOptimize : public MachineFunctionPass { public: @@ -42,7 +63,11 @@ private: bool optimizeNZCVDefs(MachineBasicBlock &MBB); -}; + + void cseFrameIndexReferences(MachineBasicBlock &MBB, + SmallVectorImpl &Ops); + bool optimizeFrameIndices(MachineBasicBlock &MBB); + }; } // end anonymous namespace void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { @@ -169,6 +194,114 @@ return Changed; } +static MachineInstr *findExistingFrameIndexCopy(MachineBasicBlock &MBB, + int FI) { + for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { + if (MI.getOpcode() != AArch64::ADDXri) + continue; + if (MI.getOperand(1).isFI() && (MI.getOperand(1).getIndex() == FI) && + MI.getOperand(2).getImm() == 0 && MI.getOperand(3).getImm() == 0) + return &MI; + } + return nullptr; +} + +void AArch64PostSelectOptimize::cseFrameIndexReferences( + MachineBasicBlock &MBB, SmallVectorImpl &Ops) { + assert(Ops.size() > 1); +#ifndef NDEBUG + assert(std::all_of(Ops.begin(), Ops.end(), [&](MachineOperand *MOP) { + return MOP->isFI() && MOP->getIndex() == Ops[0]->getIndex() && + MOP->getParent()->getParent() == Ops[0]->getParent()->getParent(); + })); +#endif + auto &MF = *MBB.getParent(); + auto &MRI = MF.getRegInfo(); + + // We might already have an existing ADD that has a frame index reference in + // this block. If so, hoist it to the top and we can use it instead of + // generating a new one. + MachineInstr *FICopy = findExistingFrameIndexCopy(MBB, Ops[0]->getIndex()); + if (FICopy) { + FICopy->moveBefore(&*MBB.getFirstNonPHI()); + } else { + // Generate a frame index via an ADD at the beginning of the block. + auto RC = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); + MachineIRBuilder MIB(MBB, MBB.getFirstNonPHI()); + auto Add = MIB.buildInstr(AArch64::ADDXri, {RC}, {}); + Add.addFrameIndex(Ops[0]->getIndex()); + Add.addImm(0); // For #0 with shifted imm. + Add.addImm(0); + FICopy = &*Add; + } + + // Replace all of the frame index references with our new reg. + for (auto MOP : Ops) + MOP->ChangeToRegister(FICopy->getOperand(0).getReg(), false); +} + +static bool shouldCSEFIFrom(MachineInstr &MI) { + return MI.mayLoadOrStore(); +} + +bool AArch64PostSelectOptimize::optimizeFrameIndices(MachineBasicBlock &MBB) { + // Frame index operands are replaced by physical registers during frame + // lowering. Doing so however may require multiple instructions to be + // generated in order to materialize the right stack pointer offset. In some + // cases, like G_MEMCPY expansion, we might have many memory operations using + // a frame index + offset addressing mode. If the object being referenced is + // too far from the stack pointer, we could have multiple stack address + // instructions generated for each memory operation. + // + // This optimization tries to mitigate this problem by searching for + // essentially CSE'ing away frame index operands within a block and replacing + // the uses with the vreg of the frame index. + + // First, collect all operands which use a frame index. + SmallVector FrameUseOps; + for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { + if (!shouldCSEFIFrom(MI)) + continue; + + for (auto &MOP : MI.explicit_uses()) + if (MOP.isFI()) + FrameUseOps.push_back(&MOP); + } + + bool Changed = false; + // Now scan for multiple uses of a single FI (single uses won't benefit from + // CSE). + SmallVector UsesOfSingleFI; + while (true) { + UsesOfSingleFI.clear(); + for (unsigned Idx = 0; Idx < FrameUseOps.size(); ++Idx) { + if (FrameUseOps[Idx] == nullptr) + continue; + + if (!UsesOfSingleFI.empty() && + UsesOfSingleFI.back()->getIndex() != FrameUseOps[Idx]->getIndex()) + continue; + // Null out the entry in the vector so we don't process it again. + UsesOfSingleFI.emplace_back(FrameUseOps[Idx]); + FrameUseOps[Idx] = nullptr; + } + + if (UsesOfSingleFI.empty()) + break; // Nothing left to process. + if (UsesOfSingleFI.size() < FIOptMinBlockReferences) + continue; // Not worth CSE. + cseFrameIndexReferences(MBB, UsesOfSingleFI); + Changed = true; + } + + return Changed; +} + +static bool isWorthFrameIdxCSE(MachineFunction &MF) { + auto &MFI = MF.getFrameInfo(); + return MFI.estimateStackSize(MF) > FIOptStackMin; +} + bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) @@ -177,9 +310,15 @@ MachineFunctionProperties::Property::Selected) && "Expected a selected MF"); + // Use some simple heuristics to determine if frame-index CSE'ing is worth it. + bool DoFICSE = EnableFIOpts && isWorthFrameIdxCSE(MF); + bool Changed = false; - for (auto &BB : MF) + for (auto &BB : MF) { Changed |= optimizeNZCVDefs(BB); + if (DoFICSE) + Changed |= optimizeFrameIndices(BB); + } return Changed; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-frameidxcse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-frameidxcse.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-frameidxcse.mir @@ -0,0 +1,378 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + %struct.wibble.1.19.29.31.61.63.65.69.71.79.81.85.87.105.113.125.127.129.131.152 = type { i32, i8*, i16, i16, i8, i32, i32, i16, i16, [1019 x i16], [1019 x i16], [510 x i8], [4096 x i16], [19 x i8], [256 x i16] } + + define void @test_fi_cse() local_unnamed_addr #0 { + bb: + %tmp3 = alloca %struct.wibble.1.19.29.31.61.63.65.69.71.79.81.85.87.105.113.125.127.129.131.152, align 8 + %tmp5 = alloca [1024 x i8], align 1 + %0 = zext i8 undef to i32 + switch i32 %0, label %common.ret [ + i32 0, label %bb33 + i32 1, label %bb60 + i32 2, label %bb60 + i32 3, label %bb60 + ] + + bb33: ; preds = %bb + call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull undef) #3 + br label %common.ret + + bb60: ; preds = %bb, %bb, %bb + %tmp78 = getelementptr inbounds %struct.wibble.1.19.29.31.61.63.65.69.71.79.81.85.87.105.113.125.127.129.131.152, %struct.wibble.1.19.29.31.61.63.65.69.71.79.81.85.87.105.113.125.127.129.131.152* %tmp3, i64 0, i32 11, i64 0 + br i1 undef, label %common.ret, label %bb82 + + bb82: ; preds = %bb60 + call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(510) %tmp78, i8 0, i64 64, i1 false) #3 + br label %common.ret + + common.ret: ; preds = %bb, %bb33, %bb60, %bb82 + ret void + } + + define void @test_no_cse_too_few_users() { + ret void + } + define void @test_no_cse_too_small_stack() { + ret void + } + define void @test_fi_reuse_add() { + ret void + } + + ; Function Attrs: argmemonly nofree nosync nounwind willreturn + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + + ; Function Attrs: argmemonly nofree nounwind willreturn writeonly + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + + attributes #0 = { "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" } + attributes #1 = { argmemonly nofree nosync nounwind willreturn } + attributes #2 = { argmemonly nofree nounwind willreturn writeonly } + attributes #3 = { nounwind } + + +... +--- +name: test_fi_cse +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + maxCallFrameSize: 0 +stack: + - { id: 0, size: 13352, alignment: 8 } + - { id: 1, size: 1024, alignment: 1 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_fi_cse + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x4cccccce), %bb.1(0x33333332) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY]], 0, %bb.3 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY1]], 0, %bb.5 + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: B %bb.5 + ; CHECK: bb.3: + ; CHECK: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK: TBNZW [[DEF]], 0, %bb.5 + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 257 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 258 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 259 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 260 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 261 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], [[ADDXri]], 262 :: (store (<2 x s64>), align 8) + ; CHECK: bb.5: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.3(0x4cccccce), %bb.6(0x33333332) + + %12:gpr32 = IMPLICIT_DEF + %8:gpr32 = COPY $wzr + TBNZW %8, 0, %bb.3 + B %bb.6 + + bb.6: + successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + + %28:gpr32 = COPY $wzr + TBNZW %28, 0, %bb.5 + B %bb.2 + + bb.2: + B %bb.5 + + bb.3: + TBNZW %12, 0, %bb.5 + B %bb.4 + + bb.4: + %18:fpr128 = MOVIv2d_ns 0 + STRQui %18, %stack.0, 257 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 258 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 259 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 260 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 261 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 262 :: (store (<2 x s64>), align 8) + + bb.5: + RET_ReallyLR + +... +--- +name: test_no_cse_too_few_users +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + maxCallFrameSize: 0 +stack: + - { id: 0, size: 13352, alignment: 8 } + - { id: 1, size: 1024, alignment: 1 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_no_cse_too_few_users + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x4cccccce), %bb.1(0x33333332) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY]], 0, %bb.3 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY1]], 0, %bb.5 + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: B %bb.5 + ; CHECK: bb.3: + ; CHECK: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK: TBNZW [[DEF]], 0, %bb.5 + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 257 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 258 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 259 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 260 :: (store (<2 x s64>), align 8) + ; CHECK: bb.5: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.3(0x4cccccce), %bb.6(0x33333332) + + %12:gpr32 = IMPLICIT_DEF + %8:gpr32 = COPY $wzr + TBNZW %8, 0, %bb.3 + B %bb.6 + + bb.6: + successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + + %28:gpr32 = COPY $wzr + TBNZW %28, 0, %bb.5 + B %bb.2 + + bb.2: + B %bb.5 + + bb.3: + TBNZW %12, 0, %bb.5 + B %bb.4 + + bb.4: + %18:fpr128 = MOVIv2d_ns 0 + STRQui %18, %stack.0, 257 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 258 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 259 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 260 :: (store (<2 x s64>), align 8) + + bb.5: + RET_ReallyLR + +... +--- +name: test_no_cse_too_small_stack +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + maxCallFrameSize: 0 +stack: + - { id: 0, size: 256, alignment: 8 } + - { id: 1, size: 64, alignment: 1 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_no_cse_too_small_stack + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x4cccccce), %bb.1(0x33333332) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY]], 0, %bb.3 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY1]], 0, %bb.5 + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: B %bb.5 + ; CHECK: bb.3: + ; CHECK: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK: TBNZW [[DEF]], 0, %bb.5 + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 257 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 258 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 259 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 260 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 261 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %stack.0, 262 :: (store (<2 x s64>), align 8) + ; CHECK: bb.5: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.3(0x4cccccce), %bb.6(0x33333332) + + %12:gpr32 = IMPLICIT_DEF + %8:gpr32 = COPY $wzr + TBNZW %8, 0, %bb.3 + B %bb.6 + + bb.6: + successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + + %28:gpr32 = COPY $wzr + TBNZW %28, 0, %bb.5 + B %bb.2 + + bb.2: + B %bb.5 + + bb.3: + TBNZW %12, 0, %bb.5 + B %bb.4 + + bb.4: + %18:fpr128 = MOVIv2d_ns 0 + STRQui %18, %stack.0, 257 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 258 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 259 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 260 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 261 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 262 :: (store (<2 x s64>), align 8) + + bb.5: + RET_ReallyLR + +... +--- +name: test_fi_reuse_add +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + maxCallFrameSize: 0 +stack: + - { id: 0, size: 13352, alignment: 8 } + - { id: 1, size: 1024, alignment: 1 } +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_fi_reuse_add + ; CHECK: bb.0: + ; CHECK: successors: %bb.3(0x4cccccce), %bb.1(0x33333332) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY]], 0, %bb.3 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: TBNZW [[COPY1]], 0, %bb.5 + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: B %bb.5 + ; CHECK: bb.3: + ; CHECK: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK: TBNZW [[DEF]], 0, %bb.5 + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: %add:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 257 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 258 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 259 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 260 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 261 :: (store (<2 x s64>), align 8) + ; CHECK: STRQui [[MOVIv2d_ns]], %add, 262 :: (store (<2 x s64>), align 8) + ; CHECK: bb.5: + ; CHECK: RET_ReallyLR + bb.1: + successors: %bb.3(0x4cccccce), %bb.6(0x33333332) + + %12:gpr32 = IMPLICIT_DEF + %8:gpr32 = COPY $wzr + TBNZW %8, 0, %bb.3 + B %bb.6 + + bb.6: + successors: %bb.2(0x40000003), %bb.5(0x3ffffffd) + + %28:gpr32 = COPY $wzr + TBNZW %28, 0, %bb.5 + B %bb.2 + + bb.2: + B %bb.5 + + bb.3: + TBNZW %12, 0, %bb.5 + B %bb.4 + + bb.4: + %18:fpr128 = MOVIv2d_ns 0 + %add:gpr64sp = ADDXri %stack.0, 0, 0 + STRQui %18, %stack.0, 257 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 258 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 259 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 260 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 261 :: (store (<2 x s64>), align 8) + STRQui %18, %stack.0, 262 :: (store (<2 x s64>), align 8) + + bb.5: + RET_ReallyLR + +...