diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -642,7 +642,8 @@ def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", "Cortex-M3 ARM processors", []>; - +def ProcM7 : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7", + "Cortex-M7 ARM processors", []>; //===----------------------------------------------------------------------===// // ARM Helper classes. @@ -1160,6 +1161,7 @@ FeatureHasNoBranchPredictor]>; def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, + ProcM7, FeatureFPARMv8_D16, FeatureUseMISched]>; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -131,6 +131,10 @@ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; + ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAGMI *DAG) const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -134,6 +134,31 @@ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } +// Called during: +// - pre-RA scheduling +// - post-RA scheduling when FeatureUseMISched is set +ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { + MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); + + // We would like to restrict this hazard recognizer to only + // post-RA scheduling; we can tell that we're post-RA because we don't + // track VRegLiveness. + // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM + // banks banked on bit 2. Assume that TCMs are in use. + if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) + MHR->AddHazardRecognizer( + std::make_unique(DAG, 0x4, true)); + + // Not inserting ARMHazardRecognizerFPMLx because that would change + // legacy behavior + + auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); + MHR->AddHazardRecognizer(std::unique_ptr(BHR)); + return MHR; +} + +// Called during post-RA scheduling when FeatureUseMISched is not set ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -13,10 +13,21 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H #define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H +#include "ARMBaseInstrInfo.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/DataTypes.h" +#include +#include namespace llvm { +class DataLayout; +class MachineFunction; +class MachineInstr; +class ScheduleDAG; + // Hazards related to FP MLx instructions class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { MachineInstr *LastMI = nullptr; @@ -32,6 +43,27 @@ void RecedeCycle() override; }; +// Hazards related to bank conflicts +class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer { + SmallVector Accesses; + const MachineFunction &MF; + const DataLayout &DL; + int64_t DataMask; + bool AssumeITCMBankConflict; + +public: + ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM, + bool ABC); + HazardType getHazardType(SUnit *SU, int Stalls) override; + void Reset() override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void RecedeCycle() override; + +private: + inline HazardType CheckOffsets(unsigned O0, unsigned O1); +}; + } // end namespace llvm #endif diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp --- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -10,11 +10,19 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +static cl::opt DataBankMask("arm-data-bank-mask", cl::init(-1), + cl::Hidden); +static cl::opt AssumeITCMConflict("arm-assume-itcm-bankconflict", + cl::init(false), cl::Hidden); + static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI) { // FIXME: Detect integer instructions properly. @@ -93,3 +101,168 @@ void ARMHazardRecognizerFPMLx::RecedeCycle() { llvm_unreachable("reverse ARM hazard checking unsupported"); } + +///////// Bank conflicts handled as hazards ////////////// + +static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, + int64_t &Offset) { + + uint64_t TSFlags = MI.getDesc().TSFlags; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + unsigned IndexMode = + (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + + // Address mode tells us what we want to know about operands for T2 + // instructions (but not size). It tells us size (but not about operands) + // for T1 instructions. + switch (AddrMode) { + default: + return false; + case ARMII::AddrModeT2_i8: + // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8, + // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8, + // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8, + // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8, + // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8 + BaseOp = &MI.getOperand(1); + Offset = (IndexMode == ARMII::IndexModePost) + ? 0 + : (IndexMode == ARMII::IndexModePre || + IndexMode == ARMII::IndexModeUpd) + ? MI.getOperand(3).getImm() + : MI.getOperand(2).getImm(); + return true; + case ARMII::AddrModeT2_i12: + // t2LDRBi12, t2LDRHi12 + // t2LDRSBi12, t2LDRSHi12 + // t2LDRi12 + BaseOp = &MI.getOperand(1); + Offset = MI.getOperand(2).getImm(); + return true; + case ARMII::AddrModeT2_i8s4: + // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8 + BaseOp = &MI.getOperand(2); + Offset = (IndexMode == ARMII::IndexModePost) + ? 0 + : (IndexMode == ARMII::IndexModePre || + IndexMode == ARMII::IndexModeUpd) + ? MI.getOperand(4).getImm() + : MI.getOperand(3).getImm(); + return true; + case ARMII::AddrModeT1_1: + // tLDRBi, tLDRBr (watch out!), TLDRSB + case ARMII::AddrModeT1_2: + // tLDRHi, tLDRHr (watch out!), TLDRSH + case ARMII::AddrModeT1_4: + // tLDRi, tLDRr (watch out!) + BaseOp = &MI.getOperand(1); + Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0; + return MI.getOperand(2).isImm(); + } + return false; +} + +ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer( + const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict) + : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()), + DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask) + : CPUBankMask), + AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences() + ? AssumeITCMConflict + : CPUAssumeITCMConflict) { + MaxLookAhead = 1; +} + +ScheduleHazardRecognizer::HazardType +ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) { + return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard; +} + +ScheduleHazardRecognizer::HazardType +ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + MachineInstr &L0 = *SU->getInstr(); + if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1) + return NoHazard; + + auto MO0 = *L0.memoperands().begin(); + auto BaseVal0 = MO0->getValue(); + auto BasePseudoVal0 = MO0->getPseudoValue(); + int64_t Offset0 = 0; + + if (MO0->getSize() > 4) + return NoHazard; + + bool SPvalid = false; + const MachineOperand *SP = nullptr; + int64_t SPOffset0 = 0; + + for (auto L1 : Accesses) { + auto MO1 = *L1->memoperands().begin(); + auto BaseVal1 = MO1->getValue(); + auto BasePseudoVal1 = MO1->getPseudoValue(); + int64_t Offset1 = 0; + + // Pointers to the same object + if (BaseVal0 && BaseVal1) { + const Value *Ptr0, *Ptr1; + Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true); + Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true); + if (Ptr0 == Ptr1 && Ptr0) + return CheckOffsets(Offset0, Offset1); + } + + if (BasePseudoVal0 && BasePseudoVal1 && + BasePseudoVal0->kind() == BasePseudoVal1->kind() && + BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) { + // Spills/fills + auto FS0 = cast(BasePseudoVal0); + auto FS1 = cast(BasePseudoVal1); + Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex()); + Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex()); + return CheckOffsets(Offset0, Offset1); + } + + // Constant pools (likely in ITCM) + if (BasePseudoVal0 && BasePseudoVal1 && + BasePseudoVal0->kind() == BasePseudoVal1->kind() && + BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict) + return Hazard; + + // Is this a stack pointer-relative access? We could in general try to + // use "is this the same register and is it unchanged?", but the + // memory operand tracking is highly likely to have already found that. + // What we're after here is bank conflicts between different objects in + // the stack frame. + if (!SPvalid) { // set up SP + if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP) + SP = nullptr; + SPvalid = true; + } + if (SP) { + int64_t SPOffset1; + const MachineOperand *SP1; + if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP) + return CheckOffsets(SPOffset0, SPOffset1); + } + } + + return NoHazard; +} + +void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); } + +void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr &MI = *SU->getInstr(); + if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1) + return; + + auto MO = *MI.memoperands().begin(); + uint64_t Size1 = MO->getSize(); + if (Size1 > 4) + return; + Accesses.push_back(&MI); +} + +void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); } + +void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -66,6 +66,7 @@ CortexA8, CortexA9, CortexM3, + CortexM7, CortexR4, CortexR4F, CortexR5, @@ -616,6 +617,7 @@ bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return ARMProcFamily == CortexM3; } + bool isCortexM7() const { return ARMProcFamily == CortexM7; } bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool isKrait() const { return ARMProcFamily == Krait; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -299,6 +299,7 @@ case CortexR5: case CortexR7: case CortexM3: + case CortexM7: case CortexR52: case CortexX1: break; diff --git a/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -arm-data-bank-mask=-1 | FileCheck %s --check-prefix=NOBANK + +; This tests the cortex-m7 bank conflict hazard recognizer. +; Normally both loads would be scheduled early (both in the first cycle) due to +; their latency. But will bank conflict to TCM so are scheduled in different +; cycles. + +define i32 @test(i32* %x0, i32 %y, i32 %z) { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r3, [r0] +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: ldr r0, [r0, #8] +; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: adds r1, #1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr +; NOBANK-LABEL: test: +; NOBANK: @ %bb.0: @ %entry +; NOBANK-NEXT: ldr r3, [r0] +; NOBANK-NEXT: ldr r0, [r0, #8] +; NOBANK-NEXT: subs r1, r3, r1 +; NOBANK-NEXT: subs r1, r1, r2 +; NOBANK-NEXT: adds r1, #1 +; NOBANK-NEXT: muls r0, r1, r0 +; NOBANK-NEXT: bx lr +entry: + %0 = load i32, i32* %x0, align 4 + %mul3 = add nsw i32 %0, 1 + %mul = sub nsw i32 %mul3, %y + %sub = sub nsw i32 %mul, %z + %arrayidx1 = getelementptr inbounds i32, i32* %x0, i32 2 + %1 = load i32, i32* %arrayidx1, align 4 + %mul2 = mul nsw i32 %sub, %1 + ret i32 %mul2 +}