Skip to content

Commit

Permalink
[ARM] Cortex-A57 scheduling model for ARM backend (AArch32)
Browse files Browse the repository at this point in the history
This patch implements the Cortex-A57 scheduling model.
The main code is in ARMScheduleA57.td, ARMScheduleA57WriteRes.td.
Small changes in cpp,.h files to support required scheduling predicates.

Scheduling model implemented according to:
 http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf.

Patch by : Andrew Zhogin (submitted on his behalf, as requested).
Rewiewed by: Renato Golin, Diana Picus, Javed Absar, Kristof Beyls.
Differential Revision: https://reviews.llvm.org/D28152

llvm-svn: 304530
  • Loading branch information
javedabsar committed Jun 2, 2017
1 parent fee75f4 commit 4ae7e81
Showing 19 changed files with 2,404 additions and 11 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/TargetSchedule.h
Original file line number Diff line number Diff line change
@@ -55,6 +55,9 @@ class TargetSchedModel {
/// Return the MCSchedClassDesc for this instruction.
const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const;

/// \brief TargetSubtargetInfo getter.
const TargetSubtargetInfo *getSubtargetInfo() const { return STI; }

/// \brief TargetInstrInfo getter.
const TargetInstrInfo *getInstrInfo() const { return TII; }

21 changes: 15 additions & 6 deletions llvm/lib/Target/ARM/ARM.td
Original file line number Diff line number Diff line change
@@ -205,6 +205,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;

/// Disable +1 predication cost for instructions updating CPSR.
/// Enabled for Cortex-A57.
def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
"CheapPredicableCPSRDef",
"true",
"Disable +1 predication cost for instructions updating CPSR">;

def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
"AvoidMOVsShifterOperand", "true",
"Avoid movs instructions with shifter operand">;
@@ -788,12 +795,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureCRC,
FeatureFPAO]>;

def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO,
FeatureAvoidPartialCPSR,
FeatureCheapPredicableCPSR]>;

def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDivThumb,
83 changes: 78 additions & 5 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
@@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate(
return Found;
}

static bool isCPSRDefined(const MachineInstr *MI) {
for (const auto &MO : MI->operands())
bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
for (const auto &MO : MI.operands())
if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}

bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Offset = MI.getOperand(Op + 1);
return Offset.getReg() != 0;
}

// Load with negative register offset requires additional 1cyc and +I unit
// for Cortex A57
bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Offset = MI.getOperand(Op + 1);
const MachineOperand &Opc = MI.getOperand(Op + 2);
assert(Opc.isImm());
assert(Offset.isReg());
int64_t OpcImm = Opc.getImm();

bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
return (isSub && Offset.getReg() != 0);
}

bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Opc = MI.getOperand(Op + 2);
unsigned OffImm = Opc.getImm();
return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
}

// Load, scaled register offset, not plus LSL2
bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
unsigned Op) const {
const MachineOperand &Opc = MI.getOperand(Op + 2);
unsigned OffImm = Opc.getImm();

bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
return !SimpleScaled;
}

// Minus reg for ldstso addr mode
bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
unsigned Op) const {
unsigned OffImm = MI.getOperand(Op + 2).getImm();
return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
}

// Load, scaled register offset
bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
unsigned Op) const {
unsigned OffImm = MI.getOperand(Op + 2).getImm();
return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
}

static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
@@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
case ARM::tSUBi3: // SUB (immediate) T1
case ARM::tSUBi8: // SUB (immediate) T2
case ARM::tSUBrr: // SUB (register) T1
return !isCPSRDefined(MI);
return !ARMBaseInstrInfo::isCPSRDefined(*MI);
}
}

@@ -3349,6 +3404,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}

bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
unsigned BaseReg = MI.getOperand(0).getReg();
for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
const auto &Op = MI.getOperand(i);
if (Op.isReg() && Op.getReg() == BaseReg)
return true;
}
return false;
}
unsigned
ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
// ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
// (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
}

int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -4119,7 +4190,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {

const MCInstrDesc &MCID = MI.getDesc();

if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
!Subtarget.cheapPredicableCPSRDef())) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
return 1;
@@ -4148,7 +4220,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}

const MCInstrDesc &MCID = MI.getDesc();
if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
!Subtarget.cheapPredicableCPSRDef()))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
18 changes: 18 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.h
Original file line number Diff line number Diff line change
@@ -159,6 +159,24 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {

bool isPredicable(const MachineInstr &MI) const override;

// CPSR defined in instruction
static bool isCPSRDefined(const MachineInstr &MI);
bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;

// Load, scaled register offset
bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
// Load, scaled register offset, not plus LSL2
bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
// Minus reg for ldstso addr mode
bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
// Scaled register offset in address mode 2
bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
// Load multiple, base reg in list
bool isLDMBaseRegInList(const MachineInstr &MI) const;
// get LDM variable defs size
unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;

/// GetInstSize - Returns the size of the specified MachineInstr.
///
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMSchedule.td
Original file line number Diff line number Diff line change
@@ -147,6 +147,9 @@ def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
(void)TII;
const ARMSubtarget *STI =
static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo());
(void)STI;
}]>;

def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
@@ -420,3 +423,4 @@ include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
include "ARMScheduleA57.td"
Loading

0 comments on commit 4ae7e81

Please sign in to comment.