Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -1490,8 +1490,20 @@ : SU(su), BaseOp(Op), Offset(ofs) {} bool operator<(const MemOpInfo &RHS) const { - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, RHS.SU->NodeNum); + if (BaseOp->getType() != RHS.BaseOp->getType()) + return BaseOp->getType() < RHS.BaseOp->getType(); + + if (BaseOp->isReg()) + return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, + RHS.SU->NodeNum); + if (BaseOp->isFI()) + return std::make_tuple(BaseOp->getIndex(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getIndex(), RHS.Offset, + RHS.SU->NodeNum); + + llvm_unreachable("MemOpClusterMutation only supports register or frame " + "index bases."); } }; Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1146,9 +1146,9 @@ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; - // Retrieve the base register, offset from the base and width. Width + // Retrieve the base, offset from the base and width. Width // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If - // base registers are identical, and the offset of a lower memory access + + // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && @@ -2035,8 +2035,9 @@ if (MI.hasOrderedMemoryRef()) return false; - // Make sure this is a reg+imm (as opposed to an address reloc). - assert(MI.getOperand(1).isReg() && "Expected a reg operand."); + // Make sure this is a reg/fi+imm (as opposed to an address reloc). + assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && + "Expected a reg or frame index operand."); if (!MI.getOperand(2).isImm()) return false; @@ -2086,11 +2087,13 @@ // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { // Non-paired instruction (e.g., ldr x1, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) + if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || + !LdSt.getOperand(2).isImm()) return false; } else if (LdSt.getNumExplicitOperands() == 4) { // Paired instruction (e.g., ldp x1, x2, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || + if (!LdSt.getOperand(1).isReg() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || !LdSt.getOperand(3).isImm()) return false; } else @@ -2117,9 +2120,9 @@ Offset = LdSt.getOperand(3).getImm() * Scale; } - assert( - BaseOp->isReg() && - "getMemOperandWithOffset only supports base operands of type register."); + assert((BaseOp->isReg() || BaseOp->isFI()) && + "getMemOperandWithOffset only supports base " + "operands of type register or frame index."); return true; } @@ -2275,31 +2278,33 @@ return true; } -// Scale the unscaled offsets. Returns false if the unscaled offset can't be -// scaled. -static bool scaleOffset(unsigned Opc, int64_t &Offset) { - unsigned OffsetStride = 1; +static unsigned getOffsetStride(unsigned Opc) { switch (Opc) { default: - return false; + return 0; case AArch64::LDURQi: case AArch64::STURQi: - OffsetStride = 16; - break; + return 16; case AArch64::LDURXi: case AArch64::LDURDi: case AArch64::STURXi: case AArch64::STURDi: - OffsetStride = 8; - break; + return 8; case AArch64::LDURWi: case AArch64::LDURSi: case AArch64::LDURSWi: case AArch64::STURWi: case AArch64::STURSi: - OffsetStride = 4; - break; + return 4; } +} + +// Scale the unscaled offsets. Returns false if the unscaled offset can't be +// scaled. +static bool scaleOffset(unsigned Opc, int64_t &Offset) { + unsigned OffsetStride = getOffsetStride(Opc); + if (OffsetStride == 0) + return false; // If the byte-offset isn't a multiple of the stride, we can't scale this // offset. if (Offset % OffsetStride != 0) @@ -2311,6 +2316,19 @@ return true; } +// Unscale the scaled offsets. Returns false if the scaled offset can't be +// unscaled. +static bool unscaleOffset(unsigned Opc, int64_t &Offset) { + unsigned OffsetStride = getOffsetStride(Opc); + if (OffsetStride == 0) + return false; + + // Convert the "element" offset used by scaled pair load/store instructions + // into the byte-offset used by unscaled. + Offset *= OffsetStride; + return true; +} + static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { if (FirstOpc == SecondOpc) return true; @@ -2329,6 +2347,30 @@ return false; } +static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, + int64_t Offset1, unsigned Opcode1, int FI2, + int64_t Offset2, unsigned Opcode2) { + // Accesses through fixed stack object frame indices may access a different + // fixed stack slot. Check that the object offsets + offsets match. + if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { + int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); + int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); + assert(ObjectOffset1 >= ObjectOffset2 && "Object offsets are not ordered."); + // Get the byte-offset from the object offset. + if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) + return false; + ObjectOffset1 += Offset1; + ObjectOffset2 += Offset2; + // Get the "element" index in the object. + if (!scaleOffset(Opcode1, ObjectOffset1) || + !scaleOffset(Opcode2, ObjectOffset2)) + return false; + return ObjectOffset2 + 1 == ObjectOffset1; + } + + return FI1 == FI2; +} + /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getMemOperandWithOffset returns true. @@ -2340,9 +2382,11 @@ if (BaseOp1.getType() != BaseOp2.getType()) return false; - assert(BaseOp1.isReg() && "Only base registers are supported."); + assert(BaseOp1.isReg() || + BaseOp1.isFI() && + "Only base registers and frame indices are supported."); - // Check for base regs. + // Check for both base regs and base FI. if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) return false; @@ -2379,7 +2423,17 @@ return false; // The caller should already have ordered First/SecondLdSt by offset. - assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + // Note: except for non-equal frame index bases + assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && + "Caller should have ordered offsets."); + + if (BaseOp1.isFI()) { + const MachineFrameInfo &MFI = + FirstLdSt.getParent()->getParent()->getFrameInfo(); + return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, + BaseOp2.getIndex(), Offset2, SecondOpc); + } + return Offset1 + 1 == Offset2; } Index: llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -259,9 +259,9 @@ define void @memset_16_stack() { ; CHECK-LABEL: memset_16_stack: ; CHECK: mov x8, #-6148914691236517206 +; CHECK-NEXT: str x8, [sp, #-32]! ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp x8, x30, [sp, #8] -; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl something %buf = alloca [16 x i8], align 1 %cast = bitcast [16 x i8]* %buf to i8* Index: llvm/trunk/test/CodeGen/AArch64/cluster-frame-index.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/cluster-frame-index.mir +++ llvm/trunk/test/CodeGen/AArch64/cluster-frame-index.mir @@ -0,0 +1,27 @@ +#RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s +... +--- +name: merge_stack +# CHECK-LABEL: name: merge_stack +tracksRegLiveness: true +stack: + - { id: 0, size: 64, alignment: 8 } +body: | + bb.0: + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0 + STRXui %3, %stack.0, 0 :: (store 8) + undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0 + STRXui %5, %stack.0, 1 :: (store 8) + RET_ReallyLR + + ; CHECK: COPY + ; CHECK-NEXT: COPY + ; CHECK-NEXT: ORRWrs + ; CHECK-NEXT: ORRWrs + ; CHECK-NEXT: STRXui + ; CHECK-NEXT: STRXui + ; CHECK-NEXT: RET