diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -885,14 +885,6 @@ def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; - -//===----------------------------------------------------------------------===// -// ARM schedules. -//===----------------------------------------------------------------------===// -// -include "ARMPredicates.td" -include "ARMSchedule.td" - //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -901,6 +893,13 @@ include "ARMRegisterBanks.td" include "ARMCallingConv.td" +//===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMPredicates.td" +include "ARMSchedule.td" + //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td --- a/llvm/lib/Target/ARM/ARMSchedule.td +++ b/llvm/lib/Target/ARM/ARMSchedule.td @@ -189,6 +189,23 @@ // LDM, base reg in list def IsLDMBaseRegInListPred : MCSchedPredicate; +class IsRegPCPred : MCSchedPredicate>; + +class BranchWriteRes resl, + list rcl, SchedWriteRes wr> : + SchedWriteRes { + let Latency = !add(wr.Latency, lat); + let ResourceCycles = !listconcat(wr.ResourceCycles, rcl); + let NumMicroOps = !add(wr.NumMicroOps, uops); + SchedWriteRes BaseWr = wr; +} + +class CheckBranchForm : + SchedWriteVariant<[ + SchedVar, [br]>, + SchedVar + ]>; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -173,22 +173,28 @@ def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; +// Check branch forms of ALU ops: +// check reg 0 for ARM_AM::PC +// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB +class A57BranchForm : + BranchWriteRes<2, 1, [A57UnitB], [1], non_br>; + // shift by register, conditional or unconditional // TODO: according to the doc, conditional uses I0/I1, unconditional uses M // Why more complex instruction uses more simple pipeline? // May be an error in doc. def A57WriteALUsi : SchedWriteVariant<[ // lsl #2, lsl #1, or lsr #1. - SchedVar, - SchedVar + SchedVar>]>, + SchedVar>]> ]>; def A57WriteALUsr : SchedWriteVariant<[ - SchedVar, - SchedVar + SchedVar>]>, + SchedVar>]> ]>; def A57WriteALUSsr : SchedWriteVariant<[ - SchedVar, - SchedVar + SchedVar>]>, + SchedVar>]> ]>; def A57ReadALUsr : SchedReadVariant<[ SchedVar, @@ -830,7 +836,6 @@ SchedVar, SchedVar, SchedVar, - SchedVar, SchedVar ]> { let Variadic=1; } @@ -851,7 +856,6 @@ SchedVar, SchedVar, SchedVar, - SchedVar, SchedVar ]> { let Variadic=1; } @@ -879,7 +883,6 @@ SchedVar, SchedVar, SchedVar, - SchedVar, SchedVar ]> { let Variadic=1; } @@ -900,7 +903,6 @@ SchedVar, SchedVar, SchedVar, - SchedVar, SchedVar ]> { let Variadic=1; } @@ -1486,7 +1488,7 @@ // ----------------------------------------------------------------------------- // Common definitions def : WriteRes { let Latency = 0; let NumMicroOps = 0; } -def : SchedAlias; +def : SchedAlias>>; def : SchedAlias; def : SchedAlias; diff --git a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td --- a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td @@ -36,13 +36,16 @@ def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20; let ResourceCycles = [20]; } def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } -def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } -def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; + let ResourceCycles = [1]; } +def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; + let ResourceCycles = [1]; } def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; } def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; } def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; } -def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; + let ResourceCycles = [1]; } def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; let ResourceCycles = [32]; } def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s --- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s @@ -874,7 +874,7 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 1 0.50 adc r1, r2, #15 -# CHECK-NEXT: 1 1 0.50 adc pc, r2, #16 +# CHECK-NEXT: 2 3 1.00 adc pc, r2, #16 # CHECK-NEXT: 1 1 0.50 adc r1, r2, #240 # CHECK-NEXT: 1 1 0.50 adc r1, r2, #3840 # CHECK-NEXT: 1 1 0.50 adc r1, r2, #61440 @@ -890,9 +890,9 @@ # CHECK-NEXT: 1 1 0.50 adcseq r1, r2, #3840 # CHECK-NEXT: 1 1 0.50 adceq r1, r2, #3840 # CHECK-NEXT: 1 1 0.50 adc r4, r5, r6 -# CHECK-NEXT: 1 1 0.50 adc pc, r5, r6 +# CHECK-NEXT: 2 3 1.00 adc pc, r5, r6 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsl #1 -# CHECK-NEXT: 1 2 1.00 adc pc, r5, r6, lsl #4 +# CHECK-NEXT: 2 4 1.00 adc pc, r5, r6, lsl #4 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsl #31 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsr #1 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsr #31 @@ -901,7 +901,7 @@ # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, asr #31 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, asr #32 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, ror #1 -# CHECK-NEXT: 1 2 1.00 adc pc, r5, r6, ror #2 +# CHECK-NEXT: 2 4 1.00 adc pc, r5, r6, ror #2 # CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, ror #31 # CHECK-NEXT: 1 2 1.00 adc r6, r7, r8, lsl r9 # CHECK-NEXT: 1 2 1.00 adc r6, r7, r8, lsr r9 @@ -954,10 +954,10 @@ # CHECK-NEXT: 1 1 0.50 adds r7, r8, #-2147483638 # CHECK-NEXT: 1 1 0.50 adds r7, r8, #40, #2 # CHECK-NEXT: 1 1 0.50 adr r2, #3 -# CHECK-NEXT: 1 1 0.50 and pc, pc, #8 +# CHECK-NEXT: 2 3 1.00 and pc, pc, #8 # CHECK-NEXT: 1 1 0.50 sub r2, pc, #3 # CHECK-NEXT: 1 1 0.50 sub r1, pc, #0 -# CHECK-NEXT: 1 1 0.50 sub pc, r2, #8 +# CHECK-NEXT: 2 3 1.00 sub pc, r2, #8 # CHECK-NEXT: 1 1 0.50 sub r1, pc, #301989888 # CHECK-NEXT: 1 1 0.50 adr r1, #301989888 # CHECK-NEXT: 1 1 0.50 and r10, r1, #15 @@ -1005,7 +1005,7 @@ # CHECK-NEXT: 1 2 1.00 bic r6, r7, r8, ror r2 # CHECK-NEXT: 1 2 1.00 bic r10, r1, r6, rrx # CHECK-NEXT: 1 1 0.50 bic r1, r1, #15 -# CHECK-NEXT: 1 1 0.50 bic pc, r1, #15 +# CHECK-NEXT: 2 3 1.00 bic pc, r1, #15 # CHECK-NEXT: 1 1 0.50 bic r10, r10, r1 # CHECK-NEXT: 1 2 1.00 bic r10, r10, r1, lsl #10 # CHECK-NEXT: 1 2 1.00 bic r10, r10, r1, lsr #10 @@ -1102,7 +1102,7 @@ # CHECK-NEXT: 1 1 0.50 eor r7, r8, #-2147483638 # CHECK-NEXT: 1 1 0.50 eor r7, r8, #40, #2 # CHECK-NEXT: 1 1 0.50 eor r4, r5, r6 -# CHECK-NEXT: 1 1 0.50 eor pc, r5, r6 +# CHECK-NEXT: 2 3 1.00 eor pc, r5, r6 # CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsl #5 # CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsr #5 # CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsr #5 @@ -1746,12 +1746,12 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] -# CHECK-NEXT: 8.00 144.50 144.50 53.00 524.00 12.00 - - +# CHECK-NEXT: 16.00 144.50 144.50 53.00 524.00 12.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #15 -# CHECK-NEXT: - 0.50 0.50 - - - - - adc pc, r2, #16 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - adc pc, r2, #16 # CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #240 # CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #3840 # CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #61440 @@ -1767,9 +1767,9 @@ # CHECK-NEXT: - 0.50 0.50 - - - - - adcseq r1, r2, #3840 # CHECK-NEXT: - 0.50 0.50 - - - - - adceq r1, r2, #3840 # CHECK-NEXT: - 0.50 0.50 - - - - - adc r4, r5, r6 -# CHECK-NEXT: - 0.50 0.50 - - - - - adc pc, r5, r6 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - adc pc, r5, r6 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsl #1 -# CHECK-NEXT: - - - - 1.00 - - - adc pc, r5, r6, lsl #4 +# CHECK-NEXT: 1.00 - - - 1.00 - - - adc pc, r5, r6, lsl #4 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsl #31 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsr #1 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsr #31 @@ -1778,7 +1778,7 @@ # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, asr #31 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, asr #32 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, ror #1 -# CHECK-NEXT: - - - - 1.00 - - - adc pc, r5, r6, ror #2 +# CHECK-NEXT: 1.00 - - - 1.00 - - - adc pc, r5, r6, ror #2 # CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, ror #31 # CHECK-NEXT: - - - - 1.00 - - - adc r6, r7, r8, lsl r9 # CHECK-NEXT: - - - - 1.00 - - - adc r6, r7, r8, lsr r9 @@ -1831,10 +1831,10 @@ # CHECK-NEXT: - 0.50 0.50 - - - - - adds r7, r8, #-2147483638 # CHECK-NEXT: - 0.50 0.50 - - - - - adds r7, r8, #40, #2 # CHECK-NEXT: - 0.50 0.50 - - - - - adr r2, #3 -# CHECK-NEXT: - 0.50 0.50 - - - - - and pc, pc, #8 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - and pc, pc, #8 # CHECK-NEXT: - 0.50 0.50 - - - - - sub r2, pc, #3 # CHECK-NEXT: - 0.50 0.50 - - - - - sub r1, pc, #0 -# CHECK-NEXT: - 0.50 0.50 - - - - - sub pc, r2, #8 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - sub pc, r2, #8 # CHECK-NEXT: - 0.50 0.50 - - - - - sub r1, pc, #301989888 # CHECK-NEXT: - 0.50 0.50 - - - - - adr r1, #301989888 # CHECK-NEXT: - 0.50 0.50 - - - - - and r10, r1, #15 @@ -1882,7 +1882,7 @@ # CHECK-NEXT: - - - - 1.00 - - - bic r6, r7, r8, ror r2 # CHECK-NEXT: - - - - 1.00 - - - bic r10, r1, r6, rrx # CHECK-NEXT: - 0.50 0.50 - - - - - bic r1, r1, #15 -# CHECK-NEXT: - 0.50 0.50 - - - - - bic pc, r1, #15 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - bic pc, r1, #15 # CHECK-NEXT: - 0.50 0.50 - - - - - bic r10, r10, r1 # CHECK-NEXT: - - - - 1.00 - - - bic r10, r10, r1, lsl #10 # CHECK-NEXT: - - - - 1.00 - - - bic r10, r10, r1, lsr #10 @@ -1979,7 +1979,7 @@ # CHECK-NEXT: - 0.50 0.50 - - - - - eor r7, r8, #-2147483638 # CHECK-NEXT: - 0.50 0.50 - - - - - eor r7, r8, #40, #2 # CHECK-NEXT: - 0.50 0.50 - - - - - eor r4, r5, r6 -# CHECK-NEXT: - 0.50 0.50 - - - - - eor pc, r5, r6 +# CHECK-NEXT: 1.00 0.50 0.50 - - - - - eor pc, r5, r6 # CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsl #5 # CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsr #5 # CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsr #5 diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1348,7 +1348,8 @@ #endif private: - bool mutuallyExclusive(Record *PredDef, ArrayRef Term); + bool mutuallyExclusive(Record *PredDef, ArrayRef Preds, + ArrayRef Term); void getIntersectingVariants( const CodeGenSchedRW &SchedRW, unsigned TransIdx, std::vector &IntersectingVariants); @@ -1367,6 +1368,7 @@ // are always checked in the order they are defined in the .td file. Later // conditions implicitly negate any prior condition. bool PredTransitions::mutuallyExclusive(Record *PredDef, + ArrayRef Preds, ArrayRef Term) { for (const PredCheck &PC: Term) { if (PC.Predicate == PredDef) @@ -1377,8 +1379,36 @@ RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants"); if (any_of(Variants, [PredDef](const Record *R) { return R->getValueAsDef("Predicate") == PredDef; - })) + })) { + // To check if PredDef is mutually exclusive with PC we also need to + // check that PC.Predicate is exclusive with all predicates from variant + // we're expanding. Consider following RW sequence with two variants + // (1 & 2), where A, B and C are predicates from corresponding SchedVars: + // + // 1:A/B - 2:C/B + // + // Here C is not mutually exclusive with variant (1), because A doesn't + // exist in variant (2). This means we have possible transitions from A + // to C and from A to B, and fully expanded sequence would look like: + // + // if (A & C) return ...; + // if (A & B) return ...; + // if (B) return ...; + // + // Now let's consider another sequence: + // + // 1:A/B - 2:A/B + // + // Here A in variant (2) is mutually exclusive with variant (1), because + // A also exists in (2). This means A->B transition is impossible and + // expanded sequence would look like: + // + // if (A) return ...; + // if (B) return ...; + if (!count(Preds, PC.Predicate)) + continue; return true; + } } return false; } @@ -1422,6 +1452,15 @@ return false; } +static std::vector getAllPredicates(ArrayRef Variants) { + std::vector Preds; + for (auto &Variant : Variants) { + assert(Variant.VarOrSeqDef->isSubClassOf("SchedVar")); + Preds.push_back(Variant.VarOrSeqDef->getValueAsDef("Predicate")); + } + return Preds; +} + // Populate IntersectingVariants with any variants or aliased sequences of the // given SchedRW whose processor indices and predicates are not mutually // exclusive with the given transition. @@ -1468,6 +1507,7 @@ if (AliasProcIdx == 0) GenericRW = true; } + std::vector AllPreds = getAllPredicates(Variants); for (TransVariant &Variant : Variants) { // Don't expand variants if the processor models don't intersect. // A zero processor index means any processor. @@ -1486,11 +1526,10 @@ " Ensure only one SchedAlias exists per RW."); } } - if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) { - Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate"); - if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm)) - continue; - } + Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate"); + if (mutuallyExclusive(PredDef, AllPreds, TransVec[TransIdx].PredTerm)) + continue; + if (IntersectingVariants.empty()) { // The first variant builds on the existing transition. Variant.TransVecIdx = TransIdx; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1446,20 +1446,20 @@ OS << Buffer; } +static bool isTruePredicate(const Record *Rec) { + return Rec->isSubClassOf("MCSchedPredicate") && + Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue"); +} + static void emitPredicates(const CodeGenSchedTransition &T, const CodeGenSchedClass &SC, PredicateExpander &PE, raw_ostream &OS) { std::string Buffer; raw_string_ostream SS(Buffer); - auto IsTruePredicate = [](const Record *Rec) { - return Rec->isSubClassOf("MCSchedPredicate") && - Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue"); - }; - // If not all predicates are MCTrue, then we need an if-stmt. unsigned NumNonTruePreds = - T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate); + T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate); SS.indent(PE.getIndentLevel() * 2); @@ -1471,7 +1471,7 @@ for (const Record *Rec : T.PredTerm) { // Skip predicates that evaluate to "true". - if (IsTruePredicate(Rec)) + if (isTruePredicate(Rec)) continue; if (FirstNonTruePredicate) { @@ -1559,6 +1559,11 @@ } } +static bool isAlwaysTrue(const CodeGenSchedTransition &T) { + return llvm::all_of(T.PredTerm, + [](const Record *R) { return isTruePredicate(R); }); +} + void SubtargetEmitter::emitSchedModelHelpersImpl( raw_ostream &OS, bool OnlyExpandMCInstPredicates) { IdxVec VariantClasses; @@ -1601,6 +1606,7 @@ } // Now emit transitions associated with processor PI. + const CodeGenSchedTransition *FinalT = nullptr; for (const CodeGenSchedTransition &T : SC.Transitions) { if (PI != 0 && !count(T.ProcIndices, PI)) continue; @@ -1615,9 +1621,17 @@ if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T)) continue; + // If transition is folded to 'return X' it should be the last one. + if (isAlwaysTrue(T)) { + FinalT = &T; + continue; + } PE.setIndentLevel(3); emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS); } + if (FinalT) + emitPredicates(*FinalT, SchedModels.getSchedClass(FinalT->ToClassIdx), + PE, OS); OS << " }\n";