diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -32,10 +32,12 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include @@ -51,6 +53,8 @@ STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); +DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE"-reg-renaming", "Controls which pairs are considered for renaming"); + // The LdStLimit limits how far we search for load/store pairs. static cl::opt LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -76,6 +80,11 @@ // to be extended, 0 means I, and 1 means the returned iterator. int SExtIdx = -1; + // If not none, RenameReg can be used to rename the result register of the + // first store in a pair. Currently this only works when merging stores + // forward. + Optional RenameReg = None; + LdStPairFlags() = default; void setMergeForward(bool V = true) { MergeForward = V; } @@ -83,6 +92,10 @@ void setSExtIdx(int V) { SExtIdx = V; } int getSExtIdx() const { return SExtIdx; } + + void setRenameReg(MCPhysReg R) { RenameReg = R; } + void clearRenameReg() { RenameReg = None; } + Optional getRenameReg() const { return RenameReg; } }; struct AArch64LoadStoreOpt : public MachineFunctionPass { @@ -99,6 +112,7 @@ // Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; + LiveRegUnits DefinedInBB; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); @@ -599,8 +613,8 @@ } } -static const MachineOperand &getLdStRegOp(const MachineInstr &MI, - unsigned PairedRegOp = 0) { +static MachineOperand &getLdStRegOp(MachineInstr &MI, + unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; return MI.getOperand(Idx); @@ -783,6 +797,67 @@ return NextI; } +// Apply Fn to all instructions between MI and the beginning of the block, until a def for DefReg is reached. +// Returns true, iff Fn returns true for all visited instructions. +static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, + const TargetRegisterInfo *TRI, + std::function &Fn) { + auto MBB = MI.getParent(); + for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(), E = MBB->rend(); I != E; I++) { + bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) { + return MOP.isReg() && MOP.isDef() && + TRI->regsOverlap(MOP.getReg(), DefReg); + }); + if (!Fn(*I, isDef)) + return false; + if (isDef) + break; + } + return true; +} + +// Apply Fn to all operands of MI that are physical, non-constant registers. +static void forAllPhysicalRegs(MachineInstr &MI, const TargetRegisterInfo *TRI, + std::function Fn) { + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (!O->isReg()) + continue; + Register Reg = O->getReg(); + if (!Reg.isPhysical() || TRI->isConstantPhysReg(Reg)) + continue; + Fn(*O); + } +} + +// Apply Fn to all sub and super registers of Reg and Reg itself. Exit early, if +// the result of Fn evaluates to true and return the result. +template +RetT anySubOrSuperReg(MCPhysReg Reg, const TargetRegisterInfo *TRI, + std::function Fn) { + for (MCSubRegIterator Subs(Reg, TRI); Subs.isValid(); ++Subs) { + if (auto R = Fn(*Subs)) + return R; + } + for (MCSuperRegIterator Super(Reg, TRI); Super.isValid(); ++Super) { + if (auto R = Fn(*Super)) + return R; + } + return Fn(Reg); +} + +static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, + const TargetRegisterInfo *TRI) { + + forAllPhysicalRegs(MI, TRI, [&Units](const MachineOperand &MOP) { + if (MOP.isKill()) + Units.removeReg(MOP.getReg()); + }); + forAllPhysicalRegs(MI, TRI, [&Units](const MachineOperand &MOP) { + if (!MOP.isKill()) + Units.addReg(MOP.getReg()); + }); +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -803,6 +878,70 @@ int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; bool MergeForward = Flags.getMergeForward(); + + Optional RenameReg = Flags.getRenameReg(); + if (MergeForward && RenameReg) { + MCRegister RegToRename = getLdStRegOp(*I).getReg(); + DefinedInBB.addReg(*RenameReg); + + // Return the sub/super register for RenameReg, matching the size of OriginalReg. + auto GetMatchingSubReg = [this, RenameReg](MCPhysReg OriginalReg) { + std::function(MCPhysReg)> M = + [OriginalReg, this](MCPhysReg R) -> Optional { + if (TRI->getMinimalPhysRegClass(OriginalReg) == + TRI->getMinimalPhysRegClass(R)) + return {R}; + return None; + }; + return *anySubOrSuperReg(*RenameReg, TRI, M); + }; + + std::function UpdateMIs = + [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { + if (IsDef) { + bool SeenDef = false; + for (auto &MOP : MI.operands()) { + // Rename the first explicit definition and all implicit definitions matching RegToRename. + if (MOP.isReg() && + (!SeenDef || (MOP.isDef() && MOP.isImplicit())) && + TRI->regsOverlap(MOP.getReg(), RegToRename)) { + assert((MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber())) && + "Need renamable operands"); + MOP.setReg(GetMatchingSubReg(MOP.getReg())); + SeenDef = true; + } + } + } else { + for (auto &MOP : MI.operands()) { + if (MOP.isReg() && TRI->regsOverlap(MOP.getReg(), RegToRename)) { + assert(MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber()) && + "Need renamable operands"); + MOP.setReg(GetMatchingSubReg(MOP.getReg())); + } + } + } + LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n"); + return true; + }; + forAllMIsUntilDef(*I, RegToRename, TRI, UpdateMIs); + + // Make sure the register used for renaming is not used between the paired + // instructions. That would trash the content before the new paired + // instruction. + for (auto &MI : + iterator_range>( + std::next(I), std::next(Paired))) + assert(all_of(MI.operands(), + [this, &RenameReg](const MachineOperand &MOP) { + return !MOP.isReg() || + !TRI->regsOverlap(MOP.getReg(), *RenameReg); + }) && + "Rename register used between paired instruction, trashing the " + "content"); + } + // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -931,6 +1070,12 @@ } LLVM_DEBUG(dbgs() << "\n"); + if (MergeForward) + forAllPhysicalRegs(*I, TRI, [this](const MachineOperand &MOP) { + if (MOP.isKill()) + DefinedInBB.addReg(MOP.getReg()); + }); + // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); @@ -1207,6 +1352,141 @@ // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? } +static bool +canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (!FirstMI.mayStore()) + return false; + + // Check if we can find an unused register which we can use to rename + // the register used by the first load/store. + auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); + MachineFunction &MF = *FirstMI.getParent()->getParent(); + if (!RegClass || !MF.getRegInfo().tracksLiveness()) + return false; + + auto RegToRename = getLdStRegOp(FirstMI).getReg(); + // For now, we only rename if the store operand gets killed at the store. + if (!getLdStRegOp(FirstMI).isKill() && + !any_of(FirstMI.operands(), + [TRI, RegToRename](const MachineOperand &MOP) { + return MOP.isReg() && MOP.isImplicit() && MOP.isKill() && + TRI->regsOverlap(RegToRename, MOP.getReg()); + })) { + LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n"); + return false; + } + auto canRenameMOP = [](const MachineOperand &MOP) { + return MOP.isImplicit() || + (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); + }; + + bool FoundDef = false; + + // For each instruction between FirstMI and the previous def for RegToRename, we + // * check if we can rename RegToRename in this instruction + // * collect the registers used and required register classes for RegToRename. + std::function CheckMIs = [&](MachineInstr &MI, + bool IsDef) { + LLVM_DEBUG(dbgs() << "Checking " << MI << "\n"); + // Currently we do not try to rename across frame-setup instructions. + if (MI.getFlag(MachineInstr::FrameSetup)) { + LLVM_DEBUG( + dbgs() << " Cannot rename framesetup instructions currently (" + << MI << ")\n"); + return false; + } + + UsedInBetween.accumulate(MI); + + // For a definition, check that we can rename the definition and exit the + // loop. + FoundDef = IsDef; + + // For defs, check if we can rename the first def of RegToRename. + if (FoundDef) { + for (auto &MOP : MI.operands()) { + if (!MOP.isReg() || !MOP.isDef() || + !TRI->regsOverlap(MOP.getReg(), RegToRename)) + continue; + if (!canRenameMOP(MOP)) { + LLVM_DEBUG(dbgs() + << " Cannot rename " << MOP << " in " << MI << "\n"); + return false; + } + RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); + } + return true; + } else { + for (auto &MOP : MI.operands()) { + if (!MOP.isReg() || !TRI->regsOverlap(MOP.getReg(), RegToRename)) + continue; + + if (!canRenameMOP(MOP)) { + LLVM_DEBUG(dbgs() + << " Cannot rename " << MOP << " in " << MI << "\n"); + return false; + } + RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); + } + } + return true; + }; + + if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, CheckMIs)) + return false; + + if (!FoundDef) { + LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n"); + return false; + } + return true; +} + +// Check if we can find a physical register for renaming. This register must: +// * not be defined up to FirstMI (checking DefinedInBB) +// * not used between the MI and the defining instruction of the register to rename (checked using UsedInBetween). +// * is available in all used register classes (checked using RequiredClasses). +static Optional tryToFindRegisterToRename( + MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, + LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + auto HasMatchingSubReg = [TRI](MCPhysReg Reg, const TargetRegisterClass *C) { + std::function M = [C, TRI](MCPhysReg R) { + return C == TRI->getMinimalPhysRegClass(R); + }; + return anySubOrSuperReg(Reg, TRI, M); + }; + + auto &MF = *FirstMI.getParent()->getParent(); + auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) { + std::function M = [&MF, TRI](MCPhysReg R) { + return TRI->isCalleeSavedPhysReg(R, MF); + }; + return anySubOrSuperReg(PR, TRI, M); + }; + + auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); + for (const MCPhysReg &PR : *RegClass) { + if (DefinedInBB.available(PR) && UsedInBetween.available(PR) && + !AnySubOrSuperRegCalleePreserved(PR) && + all_of(RequiredClasses, + [PR, HasMatchingSubReg](const TargetRegisterClass *C) { + return HasMatchingSubReg(PR, C); + })) { + DefinedInBB.addReg(PR); + LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI) + << "\n"); + return {PR}; + } + } + LLVM_DEBUG(dbgs() << "No rename register found from " + << TRI->getRegClassName(RegClass) << "\n"); + return None; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1215,6 +1495,7 @@ bool FindNarrowMerge) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; + MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; ++MBBI; @@ -1226,6 +1507,13 @@ int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); + Optional MaybeCanRename = None; + SmallPtrSet RequiredClasses; + LiveRegUnits UsedInBetween; + UsedInBetween.init(*TRI); + + Flags.clearRenameReg(); + // Track which register units have been modified and used between the first // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); @@ -1237,6 +1525,8 @@ for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; + UsedInBetween.accumulate(MI); + // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) @@ -1329,7 +1619,9 @@ !(MI.mayLoad() && !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && !mayAlias(MI, MemInsns, AA)) { + Flags.setMergeForward(false); + Flags.clearRenameReg(); return MBBI; } @@ -1337,18 +1629,41 @@ // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) && - !(MayLoad && + if (!(MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && !mayAlias(FirstMI, MemInsns, AA)) { - Flags.setMergeForward(true); - return MBBI; + + if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { + Flags.setMergeForward(true); + Flags.clearRenameReg(); + return MBBI; + } + + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, + RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, + TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + MBBIWithRenameReg = MBBI; + } + } + } } // Unable to combine these instructions due to interference in between. // Keep looking. } } + if (Flags.getRenameReg()) + return MBBIWithRenameReg; + // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI.isCall()) @@ -1680,7 +1995,12 @@ ++NumUnscaledPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. + auto Prev = std::prev(MBBI); MBBI = mergePairedInsns(MBBI, Paired, Flags); + // Collect liveness info for instructions between Prev and the new position MBBI. + for (auto I = std::next(Prev); I != MBBI; I++) + updateDefinedRegisters(*I, DefinedInBB, TRI); + return true; } return false; @@ -1742,6 +2062,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { + bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by @@ -1786,8 +2107,17 @@ // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] + + if (MBB.getParent()->getRegInfo().tracksLiveness()) { + DefinedInBB.clear(); + DefinedInBB.addLiveIns(MBB); + } + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { + // Track currently live registers up to this point, to help with + // searching for a rename register on demand. + updateDefinedRegisters(*MBBI, DefinedInBB, TRI); if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) Modified = true; else @@ -1825,11 +2155,14 @@ // or store. ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); + DefinedInBB.init(*TRI); bool Modified = false; bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); - for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); + for (auto &MBB : Fn) { + auto M = optimizeBlock(MBB, enableNarrowZeroStOpt); + Modified |= M; + } return Modified; } diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -392,10 +392,8 @@ define i32 @caller43() #3 { entry: ; CHECK-LABEL: caller43 -; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] -; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] -; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] -; CHECK-DAG: str {{q[0-9]+}}, [sp] +; CHECK-DAG: stp q1, q0, [sp, #32] +; CHECK-DAG: stp q1, q0, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -434,10 +432,8 @@ ; CHECK-LABEL: caller43_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] -; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] -; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] -; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stp q1, q0, [x29, #-32] +; CHECK-DAG: stp q1, q0, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -26,11 +26,11 @@ ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] + ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[GRVR:x[0-9]+]], #-56 ; CHECK: movk [[GRVR]], #65408, lsl #32 @@ -62,11 +62,10 @@ ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40 ; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -4,7 +4,7 @@ ; CHECK-SAME: Bytes from outlining all occurrences (16) >= ; CHECK-SAME: Unoutlined instruction bytes (16) ; CHECK-SAME: (Also found at: ) -; CHECK: remark: :0:0: Saved 48 bytes by outlining 14 instructions +; CHECK: remark: :0:0: Saved 36 bytes by outlining 11 instructions ; CHECK-SAME: from 2 locations. (Found at: , ; CHECK-SAME: ) ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml @@ -38,10 +38,10 @@ ; YAML-NEXT: Function: OUTLINED_FUNCTION_0 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Saved ' -; YAML-NEXT: - OutliningBenefit: '48' +; YAML-NEXT: - OutliningBenefit: '36' ; YAML-NEXT: - String: ' bytes by ' ; YAML-NEXT: - String: 'outlining ' -; YAML-NEXT: - Length: '14' +; YAML-NEXT: - Length: '11' ; YAML-NEXT: - String: ' instructions ' ; YAML-NEXT: - String: 'from ' ; YAML-NEXT: - NumOccurrences: '2' diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.ll b/llvm/test/CodeGen/AArch64/machine-outliner.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner.ll @@ -91,19 +91,16 @@ ; ODR: [[OUTLINED]]: ; CHECK: .p2align 2 ; CHECK-NEXT: [[OUTLINED]]: -; CHECK: mov w8, #1 -; CHECK-NEXT: str w8, [sp, #28] -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: str w8, [sp, #24] -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: str w8, [sp, #20] -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: str w8, [sp, #16] -; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: str w8, [sp, #12] -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: str w8, [sp, #8] -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret +; CHECK: mov w9, #1 +; CHECK-DAG: mov w8, #2 +; CHECK-DAG: stp w8, w9, [sp, #24] +; CHECK-DAG: mov w9, #3 +; CHECK-DAG: mov w8, #4 +; CHECK-DAG: stp w8, w9, [sp, #16] +; CHECK-DAG: mov w9, #5 +; CHECK-DAG: mov w8, #6 +; CHECK-DAG: stp w8, w9, [sp, #8] +; CHECK-DAG: add sp, sp, #32 +; CHECK-DAG: ret attributes #0 = { noredzone "target-cpu"="cyclone" "target-features"="+sse" } diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -0,0 +1,471 @@ +# RUN: llc -run-pass=aarch64-ldst-opt -mtriple=arm64-apple-iphoneos -verify-machineinstrs -o - %s | FileCheck %s + +--- +# CHECK-LABEL: name: test1 +# CHECK: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 +# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 +# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr + +name: test1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) + STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) + STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x8 = ADDXrr $x8, $x8 + STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + RET undef $lr + +... +--- +# CHECK-LABEL: name: test2 +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0, $x9, $x1 + +# CHECK: $x10, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 +# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr + +name: test2 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x9' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x9, $x1 + renamable $x9, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8) + STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) + STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x8 = ADDXrr $x8, $x8 + STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + RET undef $lr + +... +--- +# MOVK has a tied operand and we currently do not rename across tied defs. +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0 +# +# CHECK: renamable $x8 = MRS 58880 +# CHECK-NEXT: renamable $x8 = MOVZXi 15309, 0 +# CHECK-NEXT: renamable $x8 = MOVKXi renamable $x8, 26239, 16 +# CHECK-NEXT: STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) +# CHECK-NEXT: renamable $x8 = MRS 55840 +# CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 1, implicit killed $x8 :: (store 8) +# CHECK-NEXT: RET undef $lr +# +name: test3 +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + renamable $x8 = MRS 58880 + renamable $x8 = MOVZXi 15309, 0 + renamable $x8 = MOVKXi renamable $x8, 26239, 16 + STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) + renamable $x8 = MRS 55840 + STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 8) + RET undef $lr + +... +--- +# CHECK-LABEL: name: test4 +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 + +# CHECK: $x9 = MRS 58880 +# CHECK-NEXT: renamable $x8 = MRS 55840 +# CHECK-NEXT: STPXi $x9, killed renamable $x8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: RET undef $lr + +name: test4 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + renamable $x8 = MRS 58880 + STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 4) + renamable $x8 = MRS 55840 + STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + RET undef $lr + +... +--- +# CHECK-LABEL: name: test5 +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 + +# CHECK: $x9 = MRS 58880 +# CHECK-NEXT: renamable $x8 = MRS 55840 +# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: RET undef $lr + +name: test5 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + renamable $x8 = MRS 58880 + STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) + renamable $x8 = MRS 55840 + STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + RET undef $lr + +... +--- +# CHECK-LABEL: name: test6 +# CHECK-LABEL bb.0: +# CHECK: liveins: $x0, $x1, $q3 + +# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16) +# CHECK-NEXT: renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) +# CHECK-NEXT: renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4) +# CHECK-NEXT: RET undef $lr + +# XTN has a tied use-def. +name: test6 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } + - { reg: '$q3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $q3 + renamable $q9 = LDRQui $x0, 0 :: (load 16) + renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 + STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) + renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 + STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4) + RET undef $lr + +... +--- +# Currently we do not rename across frame-setup instructions. +# CHECK-LABEL: name: test7 +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 + +# CHECK: $sp = frame-setup SUBXri $sp, 64, 0 +# CHECK-NEXT: renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8) +# CHECK-NEXT: STRXui renamable $x9, $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) +# CHECK-NEXT: STRXui renamable $x9, $x0, 11 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr +# +name: test7 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + stackSize: 64 + maxAlignment: 16 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, offset: -48, size: 16, alignment: 16 } + - { id: 1, type: spill-slot, offset: -64, size: 16, alignment: 16 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + $sp = frame-setup SUBXri $sp, 64, 0 + renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8) + STRXui renamable $x9, $x0, 10 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) + STRXui renamable $x9, $x0, 11 :: (store 8, align 4) + RET undef $lr +... +--- +# CHECK-LABEL: name: test8 +# CHECK-LABEL: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 + +# CHECK: renamable $x8 = MRS 58880 +# CHECK-NEXT: $w9 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x9 +# CHECK-NEXT: renamable $x8 = MRS 55840 +# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: RET undef $lr + +name: test8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + + renamable $x8 = MRS 58880 + renamable $w8 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x8 + STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) + renamable $x8 = MRS 55840 + STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + RET undef $lr + +... +--- +# The reg class returned for $q9 contains only the first 16 Q registers. +# TODO: Can we check that all instructions that require renaming also support +# the second 16 Q registers? +# CHECK-LABEL: name: test9 +# CHECK-LABEL bb.0: +# CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + +# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16) +# CHECK-NEXT: STRQui killed renamable $q9, renamable $x0, 10 :: (store 16, align 4) +# CHECK: renamable $q9 = LDRQui $x0, 1 :: (load 16) +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) +# CHECK-NEXT: RET undef $lr + +name: test9 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } + - { reg: '$q3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + renamable $q9 = LDRQui $x0, 0 :: (load 16) + STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4) + renamable $q9 = LDRQui $x0, 1 :: (load 16) + STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) + RET undef $lr + +... +--- +# The livein $q7 is killed early, so we can re-use it for renaming. +# CHECK-LABEL: name: test10 +# CHECK-LABEL bb.0: +# CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + +# CHECK: renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 +# CHECK-NEXT: STRQui killed renamable $q7, renamable $x0, 100 :: (store 16, align 4) +# CHECK-NEXT: $q7 = LDRQui $x0, 0 :: (load 16) +# CHECK-NEXT: renamable $q9 = LDRQui $x0, 1 :: (load 16) +# CHECK-NEXT: STPQi killed renamable $q9, killed $q7, renamable $x0, 10 :: (store 16, align 4) +# CHECK-NEXT: RET undef $lr + +name: test10 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } + - { reg: '$q3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 + renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 + STRQui renamable killed $q7, renamable $x0, 100 :: (store 16, align 4) + renamable $q9 = LDRQui $x0, 0 :: (load 16) + STRQui renamable killed $q9, renamable $x0, 11 :: (store 16, align 4) + renamable $q9 = LDRQui $x0, 1 :: (load 16) + STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4) + RET undef $lr + +... +--- +# Make sure we do not use any registers that are defined between paired candidates +# ($x14 in this example) +# CHECK-LABEL: name: test11 +# CHECK: bb.0: +# CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13 + +# CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load 8) +# CHECK-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) +# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) +# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store 8, align 4) +# CHECK-NEXT: renamable $w8 = ADDWrr $w10, $w10 +# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr +# +name: test11 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x11, $x12, $x13 + renamable $w10 = LDRWui renamable $x0, 0 :: (load 8) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) + STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) + renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) + STRXui renamable $x9, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable killed $x14, renamable $x0, 200 :: (store 8, align 4) + renamable $w8 = ADDWrr $w10, $w10 + STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) + RET undef $lr + +... +--- +# Check that we correctly deal with killed registers in stores that get merged forward, +# which extends the live range of the first store operand. +# CHECK-LABEL: name: test12 +# CHECK: bb.0: +# CHECK-NEXT: liveins: $x0, $x1 +# +# CHECK: renamable $x10 = LDRXui renamable $x0, 0 :: (load 8) +# CHECK-NEXT: $x11, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) +# CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 +# CHECK-NEXT: STPXi renamable $x8, killed $x11, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STPXi killed renamable $x10, renamable $x9, renamable $x0, 20 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr + +name: test12 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + renamable $x10 = LDRXui renamable $x0, 0 :: (load 8) + STRXui renamable killed $x10, renamable $x0, 20 :: (store 8, align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8) + STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) + renamable $x8 = ADDXrr $x8, $x8 + STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x9, renamable $x0, 21 :: (store 8, align 4) + RET undef $lr + +... +--- +# Make sure we do not use any registers that are defined between def to rename and the first +# paired store. ($x14 in this example) +# CHECK-LABEL: name: test13 +# CHECK: bb.0: +# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 +# CHECK: $x15, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) +# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load 8) +# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) +# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: RET undef $lr +# +name: test13 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x8' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x10, $x11, $x12, $x13 + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) + renamable $x14 = LDRXui renamable $x0, 4 :: (load 8) + STRXui renamable killed $x14, renamable $x0, 100 :: (store 8, align 4) + STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) + STRXui renamable $x9, renamable $x0, 10 :: (store 8) + RET undef $lr + +...