Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1489,9 +1489,19 @@ MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs) : SU(su), BaseOp(Op), Offset(ofs) {} - bool operator<(const MemOpInfo &RHS) const { - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, RHS.SU->NodeNum); + bool operator<(const MemOpInfo&RHS) const { + if (BaseOp->getType() != RHS.BaseOp->getType()) + return BaseOp->getType() < RHS.BaseOp->getType(); + + if (BaseOp->isReg()) + return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, RHS.SU->NodeNum); + else if (BaseOp->isFI()) + return std::make_tuple(BaseOp->getIndex(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getIndex(), RHS.Offset, RHS.SU->NodeNum); + else + llvm_unreachable("MemOpClusterMutation only supports register or frame " + "index bases."); } }; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1146,15 +1146,17 @@ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; - // Retrieve the base register, offset from the base and width. Width + // Retrieve the base, offset from the base and width. Width // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If - // base registers are identical, and the offset of a lower memory access + + // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. if (getMemOpBaseImmOfsWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && getMemOpBaseImmOfsWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if ((BaseOpA->isReg() && BaseOpB->isReg() && - BaseOpA->getReg() == BaseOpB->getReg())) { + BaseOpA->getReg() == BaseOpB->getReg()) || + (BaseOpA->isFI() && BaseOpB->isFI() && + BaseOpA->getIndex() == BaseOpB->getIndex())) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; @@ -2163,7 +2165,8 @@ return false; // Make sure this is a reg+imm (as opposed to an address reloc). - assert(MI.getOperand(1).isReg() && "Expected a reg operand."); + assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && + "Expected a reg or frame index operand."); if (!MI.getOperand(2).isImm()) return false; @@ -2213,11 +2216,13 @@ // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { // Non-paired instruction (e.g., ldr x1, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) + if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || + !LdSt.getOperand(2).isImm()) return false; } else if (LdSt.getNumExplicitOperands() == 4) { // Paired instruction (e.g., ldp x1, x2, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || + if (!LdSt.getOperand(1).isReg() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || !LdSt.getOperand(3).isImm()) return false; } else @@ -2462,11 +2467,15 @@ if (BaseOp1.getType() != BaseOp2.getType()) return false; - assert(BaseOp1.isReg() && "Only base registers are supported."); + assert(BaseOp1.isReg() || + BaseOp1.isFI() && + "Only base registers and frame indices are supported."); - // Check for base regs. + // Check for both base regs and base FI. if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) return false; + if (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()) + return false; // Only cluster up to a single pair. if (NumLoads > 1) Index: test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- test/CodeGen/AArch64/arm64-memset-inline.ll +++ test/CodeGen/AArch64/arm64-memset-inline.ll @@ -259,9 +259,9 @@ define void @memset_16_stack() { ; CHECK-LABEL: memset_16_stack: ; CHECK: mov x8, #-6148914691236517206 +; CHECK-NEXT: str x8, [sp, #-32]! ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x8, x30, [sp, #8] -; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl something %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* Index: test/CodeGen/AArch64/cluster-frame-index.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/cluster-frame-index.mir @@ -0,0 +1,27 @@ +#RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s +... +--- +name: merge_stack +# CHECK-LABEL: name: merge_stack +tracksRegLiveness: true +stack: + - { id: 0, size: 64, alignment: 8 } +body: | + bb.0: + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0 + STRXui %3, %stack.0, 0 :: (store 8) + undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0 + STRXui %5, %stack.0, 1 :: (store 8) + RET_ReallyLR + + ; CHECK: COPY + ; CHECK-NEXT: COPY + ; CHECK-NEXT: ORRWrs + ; CHECK-NEXT: ORRWrs + ; CHECK-NEXT: STRXui + ; CHECK-NEXT: STRXui + ; CHECK-NEXT: RET