diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -79,10 +79,10 @@ /// Return true if the given load or store is a strided memory access. static bool isStridedAccess(const MachineInstr &MI); - /// Return true if this is an unscaled load/store. - static bool isUnscaledLdSt(unsigned Opc); - static bool isUnscaledLdSt(MachineInstr &MI) { - return isUnscaledLdSt(MI.getOpcode()); + /// Return true if it has an unscaled load/store offset. + static bool hasUnscaledLdStOffset(unsigned Opc); + static bool hasUnscaledLdStOffset(MachineInstr &MI) { + return hasUnscaledLdStOffset(MI.getOpcode()); } /// Returns the unscaled load/store for the scaled load/store opcode, @@ -95,6 +95,14 @@ return getMemScale(MI.getOpcode()); } + /// Returns whether the instruction is a pre-indexed load. + static bool isPreLd(const MachineInstr &MI); + + /// Returns whether the instruction is a pre-indexed store. + static bool isPreSt(const MachineInstr &MI); + + /// Returns whether the instruction is a pre-indexed load/store. + static bool isPreLdSt(const MachineInstr &MI); /// Returns the index for the immediate for a given instruction. static unsigned getLoadStoreImmIdx(unsigned Opc); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2118,22 +2118,32 @@ }); } -bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) { +bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { switch (Opc) { default: return false; case AArch64::STURSi: + case AArch64::STRSpre: case AArch64::STURDi: + case AArch64::STRDpre: case AArch64::STURQi: + case AArch64::STRQpre: case AArch64::STURBBi: case AArch64::STURHHi: case AArch64::STURWi: + case AArch64::STRWpre: case AArch64::STURXi: + case AArch64::STRXpre: case AArch64::LDURSi: + case AArch64::LDRSpre: case AArch64::LDURDi: + case AArch64::LDRDpre: case AArch64::LDURQi: + case AArch64::LDRQpre: case AArch64::LDURWi: + case AArch64::LDRWpre: case AArch64::LDURXi: + case AArch64::LDRXpre: case AArch64::LDURSWi: case AArch64::LDURHHi: case AArch64::LDURBBi: @@ -2252,15 +2262,25 @@ case AArch64::LDRSWui: // Unscaled instructions. case AArch64::STURSi: + case AArch64::STRSpre: case AArch64::STURDi: + case AArch64::STRDpre: case AArch64::STURQi: + case AArch64::STRQpre: case AArch64::STURWi: + case AArch64::STRWpre: case AArch64::STURXi: + case AArch64::STRXpre: case AArch64::LDURSi: + case AArch64::LDRSpre: case AArch64::LDURDi: + case AArch64::LDRDpre: case AArch64::LDURQi: + case AArch64::LDRQpre: case AArch64::LDURWi: + case AArch64::LDRWpre: case AArch64::LDURXi: + case AArch64::LDRXpre: case AArch64::LDURSWi: return true; } @@ -2357,20 +2377,36 @@ // Is this a candidate for ld/st merging or pairing? For example, we don't // touch volatiles or load/stores that have a hint to avoid pair formation. bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { + + bool IsPreLdSt = isPreLdSt(MI); + // If this is a volatile load/store, don't mess with it. if (MI.hasOrderedMemoryRef()) return false; // Make sure this is a reg/fi+imm (as opposed to an address reloc). - assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && + // For Pre-inc LD/ST, the operand is shifted by one. + assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() || + MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) && "Expected a reg or frame index operand."); - if (!MI.getOperand(2).isImm()) + + // For Pre-indexed addressing quadword instructions, the third operand is the + // immediate value. + bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm(); + + if (!MI.getOperand(2).isImm() && !IsImmPreLdSt) return false; // Can't merge/pair if the instruction modifies the base register. // e.g., ldr x0, [x0] // This case will never occur with an FI base. - if (MI.getOperand(1).isReg()) { + // However, if the instruction is an LDR/STRpre, it can be merged. + // For example: + // ldr q0, [x11, #32]! + // ldr q1, [x11, #16] + // to + // ldp q0, q1, [x11, #32]! + if (MI.getOperand(1).isReg() && !IsPreLdSt) { Register BaseReg = MI.getOperand(1).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI.modifiesRegister(BaseReg, TRI)) @@ -2799,14 +2835,18 @@ return 2; case AArch64::LDRSui: case AArch64::LDURSi: + case AArch64::LDRSpre: case AArch64::LDRSWui: case AArch64::LDURSWi: + case AArch64::LDRWpre: case AArch64::LDRWui: case AArch64::LDURWi: case AArch64::STRSui: case AArch64::STURSi: + case AArch64::STRSpre: case AArch64::STRWui: case AArch64::STURWi: + case AArch64::STRWpre: case AArch64::LDPSi: case AArch64::LDPSWi: case AArch64::LDPWi: @@ -2815,12 +2855,16 @@ return 4; case AArch64::LDRDui: case AArch64::LDURDi: + case AArch64::LDRDpre: case AArch64::LDRXui: case AArch64::LDURXi: + case AArch64::LDRXpre: case AArch64::STRDui: case AArch64::STURDi: + case AArch64::STRDpre: case AArch64::STRXui: case AArch64::STURXi: + case AArch64::STRXpre: case AArch64::LDPDi: case AArch64::LDPXi: case AArch64::STPDi: @@ -2830,7 +2874,9 @@ case AArch64::LDURQi: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STRQpre: case AArch64::LDPQi: + case AArch64::LDRQpre: case AArch64::STPQi: case AArch64::STGOffset: case AArch64::STZGOffset: @@ -2841,6 +2887,36 @@ } } +bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::LDRWpre: + case AArch64::LDRXpre: + case AArch64::LDRSpre: + case AArch64::LDRDpre: + case AArch64::LDRQpre: + return true; + } +} + +bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::STRWpre: + case AArch64::STRXpre: + case AArch64::STRSpre: + case AArch64::STRDpre: + case AArch64::STRQpre: + return true; + } +} + +bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) { + return isPreLd(MI) || isPreSt(MI); +} + // Scale the unscaled offsets. Returns false if the unscaled offset can't be // scaled. static bool scaleOffset(unsigned Opc, int64_t &Offset) { @@ -2944,11 +3020,11 @@ // isCandidateToMergeOrPair guarantees that operand 2 is an immediate. int64_t Offset1 = FirstLdSt.getOperand(2).getImm(); - if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) + if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1)) return false; int64_t Offset2 = SecondLdSt.getOperand(2).getImm(); - if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) + if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2)) return false; // Pairwise instructions have a 7-bit signed offset field. diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -248,28 +248,38 @@ return std::numeric_limits::max(); case AArch64::STRDui: case AArch64::STURDi: + case AArch64::STRDpre: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STRQpre: case AArch64::STRBBui: case AArch64::STURBBi: case AArch64::STRHHui: case AArch64::STURHHi: case AArch64::STRWui: + case AArch64::STRWpre: case AArch64::STURWi: case AArch64::STRXui: + case AArch64::STRXpre: case AArch64::STURXi: case AArch64::LDRDui: case AArch64::LDURDi: + case AArch64::LDRDpre: case AArch64::LDRQui: case AArch64::LDURQi: + case AArch64::LDRQpre: case AArch64::LDRWui: case AArch64::LDURWi: + case AArch64::LDRWpre: case AArch64::LDRXui: case AArch64::LDURXi: + case AArch64::LDRXpre: case AArch64::STRSui: case AArch64::STURSi: + case AArch64::STRSpre: case AArch64::LDRSui: case AArch64::LDURSi: + case AArch64::LDRSpre: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; @@ -304,33 +314,53 @@ case AArch64::STRSui: case AArch64::STURSi: return AArch64::STPSi; + case AArch64::STRSpre: + return AArch64::STPSpre; case AArch64::STRDui: case AArch64::STURDi: return AArch64::STPDi; + case AArch64::STRDpre: + return AArch64::STPDpre; case AArch64::STRQui: case AArch64::STURQi: return AArch64::STPQi; + case AArch64::STRQpre: + return AArch64::STPQpre; case AArch64::STRWui: case AArch64::STURWi: return AArch64::STPWi; + case AArch64::STRWpre: + return AArch64::STPWpre; case AArch64::STRXui: case AArch64::STURXi: return AArch64::STPXi; + case AArch64::STRXpre: + return AArch64::STPXpre; case AArch64::LDRSui: case AArch64::LDURSi: return AArch64::LDPSi; + case AArch64::LDRSpre: + return AArch64::LDPSpre; case AArch64::LDRDui: case AArch64::LDURDi: return AArch64::LDPDi; + case AArch64::LDRDpre: + return AArch64::LDPDpre; case AArch64::LDRQui: case AArch64::LDURQi: return AArch64::LDPQi; + case AArch64::LDRQpre: + return AArch64::LDPQpre; case AArch64::LDRWui: case AArch64::LDURWi: return AArch64::LDPWi; + case AArch64::LDRWpre: + return AArch64::LDPWpre; case AArch64::LDRXui: case AArch64::LDURXi: return AArch64::LDPXi; + case AArch64::LDRXpre: + return AArch64::LDPXpre; case AArch64::LDRSWui: case AArch64::LDURSWi: return AArch64::LDPSWi; @@ -539,6 +569,37 @@ } } +static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) { + + unsigned OpcA = FirstMI.getOpcode(); + unsigned OpcB = MI.getOpcode(); + + switch (OpcA) { + default: + return false; + case AArch64::STRSpre: + return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi); + case AArch64::STRDpre: + return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi); + case AArch64::STRQpre: + return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi); + case AArch64::STRWpre: + return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi); + case AArch64::STRXpre: + return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi); + case AArch64::LDRSpre: + return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi); + case AArch64::LDRDpre: + return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi); + case AArch64::LDRQpre: + return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi); + case AArch64::LDRWpre: + return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi); + case AArch64::LDRXpre: + return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi); + } +} + // Returns the scale and offset range of pre/post indexed variants of MI. static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset) { @@ -561,17 +622,20 @@ static MachineOperand &getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); - unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; + bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI); + if (IsPreLdSt) + PairedRegOp += 1; + unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0; return MI.getOperand(Idx); } static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) { - unsigned Idx = isPairedLdSt(MI) ? 2 : 1; + unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1; return MI.getOperand(Idx); } static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) { - unsigned Idx = isPairedLdSt(MI) ? 3 : 2; + unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2; return MI.getOperand(Idx); } @@ -581,10 +645,10 @@ assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); int LoadSize = TII->getMemScale(LoadInst); int StoreSize = TII->getMemScale(StoreInst); - int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst) + int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst) ? getLdStOffsetOp(StoreInst).getImm() : getLdStOffsetOp(StoreInst).getImm() * StoreSize; - int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst) + int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst) ? getLdStOffsetOp(LoadInst).getImm() : getLdStOffsetOp(LoadInst).getImm() * LoadSize; return (UnscaledStOffset <= UnscaledLdOffset) && @@ -689,7 +753,7 @@ NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); - bool IsScaled = !TII->isUnscaledLdSt(Opc); + bool IsScaled = !TII->hasUnscaledLdStOffset(Opc); int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I); bool MergeForward = Flags.getMergeForward(); @@ -795,7 +859,7 @@ int SExtIdx = Flags.getSExtIdx(); unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); - bool IsUnscaled = TII->isUnscaledLdSt(Opc); + bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc); int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1; bool MergeForward = Flags.getMergeForward(); @@ -876,7 +940,7 @@ int Offset = getLdStOffsetOp(*I).getImm(); int PairedOffset = getLdStOffsetOp(*Paired).getImm(); - bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode()); + bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode()); if (IsUnscaled != PairedIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. If // I is scaled then scale the offset of Paired accordingly. Otherwise, do @@ -894,8 +958,11 @@ } // Which register is Rt and which is Rt2 depends on the offset order. + // However, for pre load/stores the Rt should be the one of the pre + // load/store. MachineInstr *RtMI, *Rt2MI; - if (Offset == PairedOffset + OffsetStride) { + if (Offset == PairedOffset + OffsetStride && + !AArch64InstrInfo::isPreLdSt(*I)) { RtMI = &*Paired; Rt2MI = &*I; // Here we swapped the assumption made for SExtIdx. @@ -909,7 +976,7 @@ } int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); // Scale the immediate offset, if necessary. - if (TII->isUnscaledLdSt(RtMI->getOpcode())) { + if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) { assert(!(OffsetImm % TII->getMemScale(*RtMI)) && "Unscaled offset cannot be scaled."); OffsetImm /= TII->getMemScale(*RtMI); @@ -940,13 +1007,20 @@ MI.clearRegisterKills(Reg, TRI); } } - MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) - .add(RegOp0) - .add(RegOp1) - .add(BaseRegOp) - .addImm(OffsetImm) - .cloneMergedMemRefs({&*I, &*Paired}) - .setMIFlags(I->mergeFlagsWith(*Paired)); + + unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc); + MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode)); + + // Adds the pre-index operand for pre-indexed ld/st pairs. + if (AArch64InstrInfo::isPreLdSt(*RtMI)) + MIB.addReg(BaseRegOp.getReg(), RegState::Define); + + MIB.add(RegOp0) + .add(RegOp1) + .add(BaseRegOp) + .addImm(OffsetImm) + .cloneMergedMemRefs({&*I, &*Paired}) + .setMIFlags(I->mergeFlagsWith(*Paired)); (void)MIB; @@ -1054,8 +1128,8 @@ // performance and correctness are verified only in little-endian. if (!Subtarget->isLittleEndian()) return NextI; - bool IsUnscaled = TII->isUnscaledLdSt(*LoadI); - assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) && + bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI); + assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) && "Unsupported ld/st match"); assert(LoadSize <= StoreSize && "Invalid load size"); int UnscaledLdOffset = IsUnscaled @@ -1232,9 +1306,9 @@ unsigned OpcA = FirstMI.getOpcode(); unsigned OpcB = MI.getOpcode(); - // Opcodes match: nothing more to check. + // Opcodes match: If the opcodes are pre ld/st there is nothing more to check. if (OpcA == OpcB) - return true; + return !AArch64InstrInfo::isPreLdSt(FirstMI); // Try to match a sign-extended load/store with a zero-extended load/store. bool IsValidLdStrOpc, PairIsValidLdStrOpc; @@ -1257,8 +1331,14 @@ if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) return false; + // The STRpre - STRui and + // LDRpre-LDRui + // are candidate pairs that can be merged. + if (isPreLdStPairCandidate(FirstMI, MI)) + return true; + // Try to match an unscaled load/store with a scaled load/store. - return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) && + return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) && getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB); // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? @@ -1448,7 +1528,7 @@ MBBI = next_nodbg(MBBI, E); bool MayLoad = FirstMI.mayLoad(); - bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); + bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI); Register Reg = getLdStRegOp(FirstMI).getReg(); Register BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); @@ -1496,7 +1576,7 @@ // a relocation. Register MIBaseReg = getLdStBaseOp(MI).getReg(); int MIOffset = getLdStOffsetOp(MI).getImm(); - bool MIIsUnscaled = TII->isUnscaledLdSt(MI); + bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI); if (IsUnscaled != MIIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. // If FirstMI is scaled then scale the offset of MI accordingly. @@ -1517,8 +1597,35 @@ } } - if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || - (Offset + OffsetStride == MIOffset))) { + bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI); + + if (BaseReg == MIBaseReg) { + // If the offset of the second ld/st is not equal to the size of the + // destination register it can’t be paired with a pre-index ld/st + // pair. Additionally if the base reg is used or modified the operations + // can't be paired: bail and keep looking. + if (IsPreLdSt) { + bool IsOutOfBounds = MIOffset != TII->getMemScale(MI); + bool IsBaseRegUsed = + !UsedRegUnits.available(getLdStBaseOp(MI).getReg()); + bool IsBaseRegModified = + !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg()); + if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); + MemInsns.push_back(&MI); + continue; + } + } else { + if ((Offset != MIOffset + OffsetStride) && + (Offset + OffsetStride != MIOffset)) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, + UsedRegUnits, TRI); + MemInsns.push_back(&MI); + continue; + } + } + int MinOffset = Offset < MIOffset ? Offset : MIOffset; if (FindNarrowMerge) { // If the alignment requirements of the scaled wide load/store @@ -1985,7 +2092,7 @@ // Early exit if the offset is not possible to match. (6 bits of positive // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) - bool IsUnscaled = TII->isUnscaledLdSt(MI); + bool IsUnscaled = TII->hasUnscaledLdStOffset(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1; // Allow one more for offset. @@ -2000,7 +2107,7 @@ findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false); if (Paired != E) { ++NumPairCreated; - if (TII->isUnscaledLdSt(MI)) + if (TII->hasUnscaledLdStOffset(MI)) ++NumUnscaledPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. @@ -2035,7 +2142,7 @@ } // Don't know how to handle unscaled pre/post-index versions below, so bail. - if (TII->isUnscaledLdSt(MI.getOpcode())) + if (TII->hasUnscaledLdStOffset(MI.getOpcode())) return false; // Look back to try to find a pre-index instruction. For example, diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -65,8 +65,8 @@ define void @bzero_8_stack() { ; CHECK-LABEL: bzero_8_stack: -; CHECK: str xzr, [sp, #8] -; CHECK-NEXT: bl something +; CHECK: stp x30, xzr, [sp, #-16]! +; CHECK: bl something %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 8, i1 false) @@ -232,8 +232,8 @@ define void @memset_8_stack() { ; CHECK-LABEL: memset_8_stack: ; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: add x0, sp, #8 -; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: stp x30, x8, [sp, #-16]! +; CHECK-NEXT: add x0, sp, #8 // =8 ; CHECK-NEXT: bl something %buf = alloca [8 x i8], align 1 %cast = bitcast [8 x i8]* %buf to i8* diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir @@ -0,0 +1,587 @@ +# RUN: llc -o - %s -mtriple=aarch64-none-eabi -mcpu=cortex-a55 -lsr-preferred-addressing-mode=preindexed -stop-after=aarch64-ldst-opt | FileCheck %s + +--- +name: 1-ldrwpre-ldrwui-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$w0' } + - { reg: '$w1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $w1, $x1 + ; CHECK-LABEL: name: 1-ldrwpre-ldrwui-merge + ; CHECK: liveins: $w0, $w1, $x1 + ; CHECK: early-clobber $x1, renamable $w0, renamable $w1 = LDPWpre renamable $x1, 5 :: (load 4) + ; CHECK: STPWi renamable $w0, renamable $w1, renamable $x1, 0 :: (store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load 4) + renamable $w1 = LDRWui renamable $x1, 1 :: (load 4) + STRWui killed renamable $w0, renamable $x1, 0 :: (store 4) + STRWui killed renamable $w1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 2-ldrxpre-ldrxui-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $x2, $x3, $x1 + ; CHECK-LABEL: name: 2-ldrxpre-ldrxui-merge + ; CHECK: liveins: $x1, $x2, $x3 + ; CHECK: early-clobber $x1, renamable $x2, renamable $x3 = LDPXpre renamable $x1, 3 :: (load 8) + ; CHECK: STPXi renamable $x2, renamable $x3, renamable $x1, 0 :: (store 8) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $x2 = LDRXpre killed renamable $x1, 24 :: (load 8) + renamable $x3 = LDRXui renamable $x1, 1 :: (load 8) + STRXui killed renamable $x2, renamable $x1, 0 :: (store 8) + STRXui killed renamable $x3, renamable $x1, 1 :: (store 8) + RET undef $lr +... + + +--- +name: 3-ldrspre-ldrsui-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x1 + ; CHECK-LABEL: name: 3-ldrspre-ldrsui-merge + ; CHECK: liveins: $s0, $s1, $x1 + ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load 4) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) + renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 4-ldrqdre-ldrdui-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$d0' } + - { reg: '$d1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $d0, $d1, $x1 + ; CHECK-LABEL: name: 4-ldrqdre-ldrdui-merge + ; CHECK: liveins: $d0, $d1, $x1 + ; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load 8) + ; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store 8) + ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 8) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load 8) + renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) + STRDui killed renamable $d0, renamable $x1, 0 :: (store 8) + STRDui killed renamable $d1, renamable $x1, 1 :: (store 8) + RET undef $lr +... + + +--- +name: 5-ldrqpre-ldrqui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 5-ldrqpre-ldrqui-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 3 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) + renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 6-ldrqui-ldrqpre-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 6-ldrqui-ldrqpre-no-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 7-ldrqpre-ldrqui-max-offset-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 7-ldrqpre-ldrqui-max-offset-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 15 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 240 :: (load 16) + renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 8-ldrqpre-ldrqui-min-offset-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 8-ldrqpre-ldrqui-min-offset-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, -16 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, -256 :: (load 16) + renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 9-ldrspre-ldrsui-mod-base-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x0, $x1 + ; CHECK-LABEL: name: 9-ldrspre-ldrsui-mod-base-reg-no-merge + ; CHECK: liveins: $s0, $s1, $x0, $x1 + ; CHECK: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) + ; CHECK: renamable $x1 = LDRXui renamable $x0, 1 :: (load 8) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + ; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) + renamable $x1 = LDRXui renamable $x0, 1 :: (load 8) + renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 10-ldrspre-ldrsui-used-base-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x0, $x1 + ; CHECK-LABEL: name: 10-ldrspre-ldrsui-used-base-reg-no-merge + ; CHECK: liveins: $s0, $s1, $x0, $x1 + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) + ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) + ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store 8) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) + renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) + STRXui killed renamable $x0, renamable $x0, 1 :: (store 8) + renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 11-ldrqpre-ldrqpre-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 11-ldrqpre-ldrqpre-no-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load 16) + ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load 16) + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) + ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load 16) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) + ; CHECK: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 1 :: (load 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load 16) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 12 :: (load 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load 16) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 16 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 12-ldrspre-ldrsui-no-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x1 + + ; The offset of the second load is not equal to the + ; size of the destination register, and hence can’t be merged. + + ; CHECK-LABEL: name: 12-ldrspre-ldrsui-no-merge + ; CHECK: liveins: $s0, $s1, $x1 + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load 4) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) + renamable $s1 = LDRSui renamable $x1, 2 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 13-ldrqpre-ldrdui-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$d1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $d1, $x1 + ; CHECK-LABEL: name: 13-ldrqpre-ldrdui-no-merge + ; CHECK: liveins: $d1, $q0, $x1 + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) + ; CHECK: renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) + ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: (store 8) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRDui killed renamable $d1, renamable $x1, 1 :: (store 8) + RET undef $lr +... + + +--- +name: 14-ldrqpre-strqui-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 14-ldrqpre-strqui-no-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + RET undef $lr +... + + +--- +name: 15-ldrqpre-ldrqui-same-dst-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $x1 + ; CHECK-LABEL: name: 15-ldrqpre-ldrqui-same-dst-reg-no-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + RET undef $lr +... + + +--- +name: 16-ldrqpre-ldrqui-diff-base-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1, $x2 + ; CHECK-LABEL: name: 16-ldrqpre-ldrqui-diff-base-reg-no-merge + ; CHECK: liveins: $q0, $q1, $x1, $x2 + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) + ; CHECK: renamable $q1 = LDRQui renamable $x2, 1 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + renamable $q1 = LDRQui renamable $x2, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 17-ldrqpre-ldurqi-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 17-ldrqpre-ldurqi-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 2 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + renamable $q1 = LDURQi renamable $x1, 16 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 18-ldrqpre-ldurqi-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $q0, $q1, $x1 + ; CHECK-LABEL: name: 18-ldrqpre-ldurqi-no-merge + ; CHECK: liveins: $q0, $q1, $x1 + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) + ; CHECK: renamable $q1 = LDURQi renamable $x1, 1 :: (load 16) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) + renamable $q1 = LDURQi renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + RET undef $lr +... + + +--- +name: 19-ldrspre-ldrsui-max-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x1 + ; CHECK-LABEL: name: 19-ldrspre-ldrsui-max-merge + ; CHECK: liveins: $s0, $s1, $x1 + ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load 4) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load 4) + renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... + + +--- +name: 20-ldrspre-ldrsui-unaligned-no-merge +tracksRegLiveness: true +liveins: + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $s0, $s1, $x1 + ; CHECK-LABEL: name: 20-ldrspre-ldrsui-unaligned-no-merge + ; CHECK: liveins: $s0, $s1, $x1 + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load 4) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: RET undef $lr + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load 4) + renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) + STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) + STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + RET undef $lr +... diff --git a/llvm/test/CodeGen/AArch64/strpre-str-merge.mir b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir @@ -0,0 +1,426 @@ +# RUN: llc -o - %s -mtriple=aarch64-none-eabi -mcpu=cortex-a55 -lsr-preferred-addressing-mode=preindexed -stop-after=aarch64-ldst-opt | FileCheck %s + +--- +name: 1-strwpre-strwui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $w1, $w2, $x0 + ; CHECK-LABEL: name: 1-strwpre-strwui-merge + ; CHECK: liveins: $w1, $w2, $x0 + ; CHECK: early-clobber $x0 = STPWpre renamable $w1, renamable $w2, renamable $x0, 5 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRWpre killed renamable $w1, killed renamable $x0, 20 :: (store 4) + STRWui killed renamable $w2, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 + +... + + +--- +name: 2-strxpre-strxui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: 2-strxpre-strxui-merge + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: early-clobber $x0 = STPXpre renamable $x1, renamable $x2, renamable $x0, 3 :: (store 8) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store 8) + STRXui killed renamable $x2, renamable $x0, 1 :: (store 8) + RET undef $lr, implicit $x0 + +... + + +--- +name: 3-strspre-strsui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 3-strspre-strsui-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 3 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 4-strdpre-strdui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$d0' } + - { reg: '$d1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $d0, $d1, $x0 + + ; CHECK-LABEL: name: 4-strdpre-strdui-merge + ; CHECK: liveins: $d0, $d1, $x0 + ; CHECK: early-clobber $x0 = STPDpre renamable $d0, renamable $d1, renamable $x0, 16 :: (store 8) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRDpre killed renamable $d0, killed renamable $x0, 128 :: (store 8) + STRDui killed renamable $d1, renamable $x0, 1 :: (store 8) + RET undef $lr, implicit $x0 + +... + + +--- +name: 5-strqpre-strqui-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: 5-strqpre-strqui-merge + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store 16) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) + STRQui killed renamable $q1, renamable $x0, 1 :: (store 16) + RET undef $lr, implicit $x0 + +... + + +--- +name: 6-strqui-strqpre-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: 6-strqui-strqpre-no-merge + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: STRQui renamable $q1, renamable $x0, 1 :: (store 16) + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store 16) + ; CHECK: RET undef $lr, implicit $x0 + STRQui killed renamable $q1, renamable $x0, 1 :: (store 16) + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) + RET undef $lr, implicit $x0 +... + + +--- +name: 7-strspre-strsui-max-offset-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 7-strspre-strsui-max-offset-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 63 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 252 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 8-strspre-strsui-min-offset-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 8-strspre-strsui-min-offset-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, -64 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, -256 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 9-strspre-strsui-mod-base-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0, $x1 + ; CHECK-LABEL: name: 9-strspre-strsui-mod-base-reg-no-merge + ; CHECK: liveins: $s0, $s1, $x0, $x1 + ; CHECK: dead early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) + ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) + renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 10-strspre-strsui-used-base-reg-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0, $x1 + ; CHECK-LABEL: name: 10-strspre-strsui-used-base-reg-no-merge + ; CHECK: liveins: $s0, $s1, $x0, $x1 + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) + ; CHECK: STRXui renamable $x1, renamable $x1, 1 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) + + STRXui killed renamable $x1, renamable $x1, 1 :: (store 4) + + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 11-strspre-strspre-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 11-strspre-strspre-no-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 16, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 12, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 4, implicit $w0 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 12 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 16 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 12 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 4 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 12-strspre-strsui-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + + ; The offset of the second st is not equal to the + ; size of the destination register, and hence can’t be merged. + + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 12-strspre-strsui-no-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x0, 2 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 2 :: (store 4) + RET undef $lr, implicit $x0 +... + + +--- +name: 13-strqpre-sturqi-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: 13-strqpre-sturqi-merge + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store 16) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) + STURQi killed renamable $q1, renamable $x0, 16 :: (store 16) + RET undef $lr, implicit $x0 + +... + + +--- +name: 14-strqpre-sturqi-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$q0' } + - { reg: '$q1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $q0, $q1, $x0 + ; CHECK-LABEL: name: 14-strqpre-sturqi-no-merge + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store 16) + ; CHECK: STURQi renamable $q1, renamable $x0, 1 :: (store 16) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) + STURQi killed renamable $q1, renamable $x0, 1 :: (store 16) + RET undef $lr, implicit $x0 +... + + +--- +name: 15-strspre-strsui-unaligned-no-merge +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$s0' } + - { reg: '$s1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $s0, $s1, $x0 + ; CHECK-LABEL: name: 15-strspre-strsui-unaligned-no-merge + ; CHECK: liveins: $s0, $s1, $x0 + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 251, implicit $w0 :: (store 4) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: RET undef $lr, implicit $x0 + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 251 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + RET undef $lr, implicit $x0 +...