Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -63,6 +63,9 @@ /// value is non-zero. bool hasExtendedReg(const MachineInstr &MI) const; + /// Return true if \p Offset is encodable for an instruction \p MI. + bool isEncodableLdStOffset(const MachineInstr &MI, int64_t Offset) const; + /// \brief Does this instruction set its full destination register to zero? bool isGPRZero(const MachineInstr &MI) const; @@ -133,6 +136,92 @@ int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + /// Return the immediate offset of the base register in a load/store \p LdSt. + MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; + + /// \brief Return the scale applied to the immediate offset for an instruction + /// with opcode \p Opc. + static int getScale(unsigned Opc) { + switch(Opc) { + default: + return -1; + case AArch64::LDRBBui: + case AArch64::LDRBui: + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDURSi: + case AArch64::LDURWi: + case AArch64::LDURXi: + case AArch64::STRBBui: + case AArch64::STRBui: + case AArch64::STURBBi: + case AArch64::STURBi: + case AArch64::STURDi: + case AArch64::STURHHi: + case AArch64::STURHi: + case AArch64::STURQi: + case AArch64::STURSi: + case AArch64::STURWi: + case AArch64::STURXi: + return 1; + + case AArch64::LDRHHui: + case AArch64::LDRHui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::STRHHui: + case AArch64::STRHui: + return 2; + + case AArch64::LDNPSi: + case AArch64::LDNPWi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::LDRSWui: + case AArch64::LDRSui: + case AArch64::LDRWui: + case AArch64::STNPSi: + case AArch64::STNPWi: + case AArch64::STPSi: + case AArch64::STPWi: + case AArch64::STRSui: + case AArch64::STRWui: + return 4; + + case AArch64::LDNPDi: + case AArch64::LDNPXi: + case AArch64::LDPDi: + case AArch64::LDPXi: + case AArch64::LDRDui: + case AArch64::LDRXui: + case AArch64::PRFMui: + case AArch64::STNPDi: + case AArch64::STNPXi: + case AArch64::STPDi: + case AArch64::STPXi: + case AArch64::STRDui: + case AArch64::STRXui: + return 8; + + case AArch64::LDNPQi: + case AArch64::LDPQi: + case AArch64::LDRQui: + case AArch64::STNPQi: + case AArch64::STPQi: + case AArch64::STRQui: + return 16; + } + } + bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const override; @@ -242,7 +331,31 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; + unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, + bool CanBeTailCall) const override; + AArch64GenInstrInfo::MachineOutlinerInstrType + getOutliningType(MachineInstr &MI) const override; + void insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const override; + void insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool isTailCall) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, + MachineFunction &MF, + bool IsTailCall) const override; + private: + + /// \brief Sets the offsets on outlined instructions in \p MBB which use SP + /// so that they will be valid post-outlining. + /// + /// \param MBB A \p MachineBasicBlock in an outlined function. + void fixupPostOutline(MachineBasicBlock &MBB) const; + void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, ArrayRef Cond) const; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" @@ -1393,6 +1394,75 @@ return false; } +bool AArch64InstrInfo::isEncodableLdStOffset(const MachineInstr &MI, + int64_t Offset) const { + assert(MI.mayLoadOrStore() && "Expected a load/store!"); + + // Get the scale for the opcode and recompute the offset using it. + int Scale = getScale(MI.getOpcode()); + + // Does the instruction even have a scale? + if (Scale == -1) + return false; + + Offset /= Scale; + + // Check if the offset is in the range. + switch(MI.getOpcode()) { + // Unsigned instructions. + default: + return (Offset >= 0 && Offset <= 4095); + + // Signed instructions with 9 bit offsets. + case AArch64::LDURXi: + case AArch64::LDURWi: + case AArch64::LDURBi: + case AArch64::LDURHi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURHHi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::LDURSWi: + case AArch64::STURXi: + case AArch64::STURWi: + case AArch64::STURBi: + case AArch64::STURHi: + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURBBi: + case AArch64::STURHHi: + return (Offset >= -256 && Offset <= 255); + + // Signed instructions with 7 bit offsets. + case AArch64::LDPXi: + case AArch64::LDPDi: + case AArch64::STPXi: + case AArch64::STPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + case AArch64::LDPQi: + case AArch64::STPQi: + case AArch64::LDNPQi: + case AArch64::STNPQi: + case AArch64::LDPWi: + case AArch64::LDPSi: + case AArch64::STPWi: + case AArch64::STPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STNPWi: + return (Offset >= -64 && Offset <= 63); + } +} + // Return true if this instruction simply sets its single destination register // to zero. This is equivalent to a register rename of the zero-register. bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const { @@ -1798,6 +1868,14 @@ return true; } +MachineOperand& +AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { + assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); + MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1); + assert(OfsOp.isImm() && "Offset operand wasn't immediate."); + return OfsOp; +} + // Scale the unscaled offsets. Returns false if the unscaled offset can't be // scaled. static bool scaleOffset(unsigned Opc, int64_t &Offset) { @@ -4196,3 +4274,187 @@ {MO_TLS, "aarch64-tls"}}; return makeArrayRef(TargetFlags); } + +unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, + size_t Occurrences, + bool CanBeTailCall) const { + unsigned NotOutlinedSize = SequenceSize * Occurrences; + unsigned OutlinedSize; + + // Is this candidate something we can outline as a tail call? + if (CanBeTailCall) { + // If yes, then we just outline the sequence and replace each of its + // occurrences with a branch instruction. + OutlinedSize = SequenceSize + Occurrences; + } else { + // If no, then we outline the sequence (SequenceSize), add a return (+1), + // and replace each occurrence with a save/restore to LR and a call + // (3 * Occurrences) + OutlinedSize = (SequenceSize + 1) + (3 * Occurrences); + } + + // Return the number of instructions saved by outlining this sequence. + return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; +} + +bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +} + +AArch64GenInstrInfo::MachineOutlinerInstrType +AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { + + MachineFunction *MF = MI.getParent()->getParent(); + AArch64FunctionInfo *FuncInfo = MF->getInfo(); + + // Don't outline LOHs. + if (FuncInfo->getLOHRelated().count(&MI)) + return MachineOutlinerInstrType::Illegal; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugValue() || MI.isIndirectDebugValue()) + return MachineOutlinerInstrType::Invisible; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return MachineOutlinerInstrType::Legal; + + // It's not, so don't outline it. + return MachineOutlinerInstrType::Illegal; + } + + // Don't outline positions. + if (MI.isPosition()) + return MachineOutlinerInstrType::Illegal; + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return MachineOutlinerInstrType::Illegal; + + // Don't outline anything that uses the link register. + if (MI.modifiesRegister(AArch64::LR, &RI) || + MI.readsRegister(AArch64::LR, &RI)) + return MachineOutlinerInstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(AArch64::SP, &RI) || + MI.readsRegister(AArch64::SP, &RI)) { + + // Is it a memory operation? + if (MI.mayLoadOrStore()) { + unsigned Base; // Filled with the base regiser of MI. + int64_t Offset; // Filled with the offset of MI. + unsigned Dummy; + + // Does it allow us to offset the base register and is the base SP? + if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Dummy, &RI) || + Base != AArch64::SP) + return MachineOutlinerInstrType::Illegal; + + // Would adding the fixup overflow the instruction? + if (!isEncodableLdStOffset(MI, Offset + 16)) + return MachineOutlinerInstrType::Illegal; + + // It's fixable, so we can outline it. + return MachineOutlinerInstrType::Legal; + } + + // We can't fix it up, so don't outline it. + return MachineOutlinerInstrType::Illegal; + } + + return MachineOutlinerInstrType::Legal; +} + +void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + unsigned Base, Width; + int64_t Offset; + + // Is this a load or store with an immediate offset with SP as the base? + if (!MI.mayLoadOrStore() || + !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) || + Base != AArch64::SP) + continue; + + // It is, so we have to fix it up. + MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); + assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); + unsigned Scale = getScale(MI.getOpcode()); + assert(Scale != 0 && "Instruction doesn't have a scale!"); + + // We've pushed the return address to the stack, so add 16 to the offset. + // This is safe, since we already checked if it would overflow when we + // checked if this instruction was legal to outline. + int64_t NewImm = (Offset + 16)/Scale; + StackOffsetOperand.setImm(NewImm); + } +} + +void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const { + + // If this is a tail call outlined function, then there's already a return. + if (IsTailCall) + return; + + // It's not a tail call, so we have to insert the return ourselves. + MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) + .addReg(AArch64::LR, RegState::Undef); + MBB.insert(MBB.end(), ret); + + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const {} + +MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, bool IsTailCall) const { + + // Are we tail calling? + if (IsTailCall) { + // If yes, then we can just branch to the label. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::B)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We're not tail calling, so we have to save LR before the call and restore + // it after. + MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + It = MBB.insert(It, STRXpre); + It++; + + // Insert the call. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + + It++; + + // Restore the link register. + MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(16); + It = MBB.insert(It, LDRXpost); + + return It; +} + Index: test/CodeGen/AArch64/machine-outliner.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner.ll @@ -0,0 +1,43 @@ +; RUN: llc -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define void @cat() #0 { +; CHECK-LABEL: _cat: +; CHECK: b l_OUTLINED_FUNCTION_0 +; CHECK-NOT: ret + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4 + ret void +} + +define void @dog() #0 { +; CHECK-LABEL: _dog: +; CHECK: b l_OUTLINED_FUNCTION_0 +; CHECK-NOT: ret + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4 + ret void +} + +; CHECK-LABEL: l_OUTLINED_FUNCTION_0: +; CHECK: orr w8, wzr, #0x1 +; CHECK-NEXT: stp w8, wzr, [sp, #8] +; CHECK-NEXT: orr w8, wzr, #0x2 +; CHECK-NEXT: str w8, [sp, #4] +; CHECK-NEXT: orr w8, wzr, #0x3 +; CHECK-NEXT: str w8, [sp], #16 +; CHECK-NEXT: ret + + +attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" "target-cpu"="cyclone" }