Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -244,7 +244,38 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; + unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, + bool CanBeTailCall) const override; + llvm::AArch64GenInstrInfo::MachineOutlinerInstrType + getOutliningType(MachineInstr &MI) const override; + void insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const override; + void insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool isTailCall) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, + MachineFunction &MF, + bool IsTailCall) const override; + + private: + /// \brief Returns an offset that will make a \p MachineInstr \p MI which uses + /// SP as an operand valid post-outlining if such an offset exists. + /// + /// \param MI The machine instruction to fix. + /// \returns The new offset to be applied to SP if one exists, -1 otherwise. + int getPostOutliningFixup(MachineInstr &MI) const; + + /// \brief Sets the offsets on outlined instructions in \p MBB which use SP + /// so that they will be valid post-outlining. + /// + /// \param MBB A \p MachineBasicBlock in an outlined function. + void fixupPostOutline(MachineBasicBlock &MBB) const; + void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB, ArrayRef Cond) const; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" @@ -4207,3 +4208,268 @@ {MO_TLS, "aarch64-tls"}}; return makeArrayRef(TargetFlags); } + +unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, + size_t Occurrences, + bool CanBeTailCall) const { + unsigned NotOutlinedSize = SequenceSize * Occurrences; + unsigned OutlinedSize; + + // Is this candidate something we can outline as a tail call? + if (CanBeTailCall) { + // If yes, then we just outline the sequence and replace each of its + // occurrences with a branch instruction. + OutlinedSize = SequenceSize + Occurrences; + } else { + // If no, then we outline the sequence (SequenceSize), add a return (+1), + // and replace each occurrence with a save/restore to LR and a call + // (3 * Occurrences) + OutlinedSize = (SequenceSize + 1) + (3 * Occurrences); + } + + // Return the number of instructions saved by outlining this sequence. + return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; +} + +bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +} + +int AArch64InstrInfo::getPostOutliningFixup(MachineInstr &MI) const { + + auto &StackOffsetOperand = MI.getOperand(MI.getNumExplicitOperands() - 1); + + // Don't fixup things if they don't have an offset to fix. + if (!StackOffsetOperand.isImm()) + return -1; + + switch (MI.getOpcode()) { + + // Scale = 1 + case AArch64::LDURBBi: + case AArch64::LDURBi: + case AArch64::LDURDi: + case AArch64::LDURHHi: + case AArch64::LDURHi: + case AArch64::LDURQi: + case AArch64::LDURSBWi: + case AArch64::LDURSBXi: + case AArch64::LDURSHWi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::LDURSi: + case AArch64::LDURWi: + case AArch64::LDURXi: + case AArch64::STURBBi: + case AArch64::STURBi: + case AArch64::STURDi: + case AArch64::STURHHi: + case AArch64::STURHi: + case AArch64::STURQi: + case AArch64::STURSi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::STRWpost: + case AArch64::LDRWpost: + return StackOffsetOperand.getImm() + 16; + + // Scale = 2 + case AArch64::LDRHHui: + case AArch64::LDRHui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::STRHHui: + case AArch64::STRHui: + return StackOffsetOperand.getImm() + 8; + + // Scale = 4 + case AArch64::LDNPSi: + case AArch64::LDNPWi: + case AArch64::LDPSi: + case AArch64::LDPWi: + case AArch64::LDRSWui: + case AArch64::LDRSui: + case AArch64::LDRWui: + case AArch64::STNPSi: + case AArch64::STNPWi: + case AArch64::STPSi: + case AArch64::STPWi: + case AArch64::STRSui: + case AArch64::STRWui: + return StackOffsetOperand.getImm() + 4; + + // Scale = 8 + case AArch64::LDNPDi: + case AArch64::LDNPXi: + case AArch64::LDPDi: + case AArch64::LDPXi: + case AArch64::LDRDui: + case AArch64::LDRXui: + case AArch64::STNPDi: + case AArch64::STNPXi: + case AArch64::STPDi: + case AArch64::STPXi: + case AArch64::STRDui: + case AArch64::STRXui: + return StackOffsetOperand.getImm() + 2; + + // Scale = 16 + case AArch64::LDNPQi: + case AArch64::LDPQi: + case AArch64::LDRQui: + case AArch64::STNPQi: + case AArch64::STPQi: + case AArch64::STRQui: + return StackOffsetOperand.getImm() + 1; + default: + break; + } + + return -1; +} + +AArch64GenInstrInfo::MachineOutlinerInstrType +AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { + + MachineFunction *MF = MI.getParent()->getParent(); + AArch64FunctionInfo *FuncInfo = MF->getInfo(); + + // Don't outline LOHs. + if (FuncInfo->getLOHRelated().count(&MI)) + return MachineOutlinerInstrType::Illegal; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugValue() || MI.isIndirectDebugValue()) + return MachineOutlinerInstrType::Invisible; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + + // Does its parent have any successors in its MachineFunction? + if (MI.getParent()->succ_empty()) + return MachineOutlinerInstrType::Legal; + + // It does have successors, so we can't outline it. + return MachineOutlinerInstrType::Illegal; + } + + // Don't outline positions. + if (MI.isPosition()) + return MachineOutlinerInstrType::Illegal; + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return MachineOutlinerInstrType::Illegal; + + // Don't outline anything that uses the link register. + if (MI.modifiesRegister(AArch64::LR, &RI) || + MI.readsRegister(AArch64::LR, &RI) || + MI.getDesc().hasImplicitUseOfPhysReg(AArch64::LR) || + MI.getDesc().hasImplicitDefOfPhysReg(AArch64::LR)) + return MachineOutlinerInstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(AArch64::SP, &RI) || + MI.readsRegister(AArch64::SP, &RI) || + MI.getDesc().hasImplicitUseOfPhysReg(AArch64::SP) || + MI.getDesc().hasImplicitDefOfPhysReg(AArch64::SP)) { + + // Can this instruction be fixed up after we outline it? + if (getPostOutliningFixup(MI) == -1) + return MachineOutlinerInstrType::Illegal; + } + + return MachineOutlinerInstrType::Legal; +} + +void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + + // Does this instruction use the stack? + if ((MI.modifiesRegister(AArch64::SP, &RI) || + MI.readsRegister(AArch64::SP, &RI) || + MI.getDesc().hasImplicitUseOfPhysReg(AArch64::SP) || + MI.getDesc().hasImplicitDefOfPhysReg(AArch64::SP))) { + + // Get the operand that uses the stack. + auto &StackOffsetOperand = + MI.getOperand(MI.getNumExplicitOperands() - 1); + + // Make sure that the instruction we're outlining can actually be fixed + // up. + assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); + int64_t NewOffset = getPostOutliningFixup(MI); + + assert(NewOffset != -1 && + "Unfixable instruction shouldn't make it here!"); + + // Fix it up. + StackOffsetOperand.setImm(NewOffset); + } + } +} + +void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const { + + // If this is a tail call outlined function, then there's already a return. + if (IsTailCall) + return; + + // It's not a tail call, so we have to insert the return ourselves. + MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) + .addReg(AArch64::LR, RegState::Undef); + MBB.insert(MBB.end(), ret); + + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const {} + +MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, bool IsTailCall) const { + + // Are we tail calling? + if (IsTailCall) { + // If yes, then we can just branch to the label. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::B)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We're not tail calling, so we have to save LR before the call and restore + // it after. + MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + It = MBB.insert(It, STRXpre); + It++; + + // Insert the call. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + + It++; + + // Restore the link register. + MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(16); + It = MBB.insert(It, LDRXpost); + + return It; +} + Index: test/CodeGen/AArch64/machine-outliner.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-outliner.ll @@ -0,0 +1,43 @@ +; RUN: llc -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define void @cat() #0 { +; CHECK-LABEL: _cat: +; CHECK: b l_OUTLINED_FUNCTION_0 +; CHECK-NOT: ret + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4 + ret void +} + +define void @dog() #0 { +; CHECK-LABEL: _dog: +; CHECK: b l_OUTLINED_FUNCTION_0 +; CHECK-NOT: ret + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4 + ret void +} + +; CHECK-LABEL: l_OUTLINED_FUNCTION_0: +; CHECK: orr w8, wzr, #0x1 +; CHECK-NEXT: stp w8, wzr, [sp, #8] +; CHECK-NEXT: orr w8, wzr, #0x2 +; CHECK-NEXT: str w8, [sp, #4] +; CHECK-NEXT: orr w8, wzr, #0x3 +; CHECK-NEXT: str w8, [sp], #16 +; CHECK-NEXT: ret + + +attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" "target-cpu"="cyclone" }