Index: llvm/test/TableGen/sched-aliases.td =================================================================== --- /dev/null +++ llvm/test/TableGen/sched-aliases.td @@ -0,0 +1,50 @@ +// REQUIRES: aarch64-registered-target +// RUN: llvm-tblgen -gen-instr-info %s -I%p/../../include -I%p/../../lib/Target/AArch64 -o %t -debug-only=subtarget-emitter 2>&1 | FileCheck %s + +// Check that we've defined scheduling classes for FMOVv2f32_ns and FMOVv2f64 for Model0 +// CHECK: InstRW: New SC [[SC:[0-9]+]]:FMOVv2f32_ns on Model0 +// CHECK: InstRW: New SC [[SC2:[0-9]+]]:FMOVv2f64_ns on Model0 + +// Generic transition for WriteV should be defined for Model0/ProcFoo0 as well as for +// all instructions without explicitly defined scheduling classes. +// CHECK: Adding transition from WriteV({{[0-9]+}}) to Model0WriteV_4cyc({{[0-9]+}}) on processor indices +// CHECK: Adding transition from WriteV({{[0-9]+}}) to Model0WriteV_2cyc({{[0-9]+}}) on processor indices + +// Transition from FMOVv2f64_ns should still be added for Model0/ProcFoo0, +// even though we've defined custom scheduling class. +// CHECK: Adding transition from FMOVv2f64_ns([[SC2]]) to Model0WriteV_4cyc({{[0-9]+}}) on processor indices +// CHECK-NEXT: Adding transition from FMOVv2f64_ns([[SC2]]) to Model0WriteV_2cyc({{[0-9]+}}) on processor indices + +// Transition from FMOVv2f32_ns should not be added for Model0/ProcFoo0, +// because custom sched class for it is defined and it's not variant. +// CHECK-NOT: Adding transition from FMOVv2f32_ns([[SC]]) + +include "AArch64.td" + +def Model0 : SchedMachineModel { + let CompleteModel = 0; +} + +def Model0UnitV : ProcResource<1> { let BufferSize = 0; } + +let SchedModel = Model0 in { + +def Model0WriteV_4cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 4; } +def Model0WriteV_2cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 2; } +def Model0WriteV_1cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 1; } + +def Model0QFormPred : MCSchedPredicate; +def Model0WriteV : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def : SchedAlias; + +def : InstRW<[Model0WriteV_1cyc], (instrs FMOVv2f32_ns)>; +def : InstRW<[WriteV], (instrs FMOVv2f64_ns)>; +} + +def ProcFoo0 : SubtargetFeature<"foo-0", "ARMProcFamily", "foo-0", + "Test Processor #1", []>; + +def : ProcessorModel<"foo-0-model", Model0, [ProcFoo0]>; Index: llvm/test/tools/llvm-mca/ARM/A57-sxtb.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/ARM/A57-sxtb.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=armv7 -mcpu=cortex-a57 -instruction-tables < %s | FileCheck %s + + .text + sxtab16ge r0, r1, r4 + sxtab16 r6, r2, r7 + sxtab16 r3, r5, r8, ror #8 + sxtab16 r3, r2, r1, ror #16 + sxtab16eq r1, r2, r3, ror #24 + sxtbge r2, r4 + sxtb r5, r6 + sxtb r6, r9, ror #8 + sxtblo r5, r1, ror #16 + sxtb r8, r3, ror #24 + sxtb16 r1, r4 + sxtb16 r6, r7 + sxtb16hs r3, r5, ror #8 + sxtb16 r3, r1, ror #16 + sxtb16ge r2, r3, ror #24 + sxthne r3, r9 + sxth r1, r6 + sxth r3, r8, ror #8 + sxthle r2, r2, ror #16 + sxth r9, r3, ror #24 + uxtab16ge r0, r1, r4 + uxtab16 r6, r2, r7 + uxtab16 r3, r5, r8, ror #8 + uxtab16 r3, r2, r1, ror #16 + uxtab16eq r1, r2, r3, ror #24 + uxtbge r2, r4 + uxtb r5, r6 + uxtb r6, r9, ror #8 + uxtblo r5, r1, ror #16 + uxtb r8, r3, ror #24 + uxtb16 r1, r4 + uxtb16 r6, r7 + uxtb16hs r3, r5, ror #8 + uxtb16 r3, r1, ror #16 + uxtb16ge r2, r3, ror #24 + uxthne r3, r9 + uxth r1, r6 + uxth r3, r8, ror #8 + uxthle r2, r2, ror #16 + uxth r9, r3, ror #24 + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 1.00 sxtab16ge r0, r1, r4 +# CHECK-NEXT: 1 4 1.00 sxtab16 r6, r2, r7 +# CHECK-NEXT: 1 4 1.00 sxtab16 r3, r5, r8, ror #8 +# CHECK-NEXT: 1 4 1.00 sxtab16 r3, r2, r1, ror #16 +# CHECK-NEXT: 1 4 1.00 sxtab16eq r1, r2, r3, ror #24 +# CHECK-NEXT: 1 1 0.50 sxtbge r2, r4 +# CHECK-NEXT: 1 1 0.50 sxtb r5, r6 +# CHECK-NEXT: 1 1 0.50 sxtb r6, r9, ror #8 +# CHECK-NEXT: 1 1 0.50 sxtblo r5, r1, ror #16 +# CHECK-NEXT: 1 1 0.50 sxtb r8, r3, ror #24 +# CHECK-NEXT: 1 2 1.00 sxtb16 r1, r4 +# CHECK-NEXT: 1 2 1.00 sxtb16 r6, r7 +# CHECK-NEXT: 1 2 1.00 sxtb16hs r3, r5, ror #8 +# CHECK-NEXT: 1 2 1.00 sxtb16 r3, r1, ror #16 +# CHECK-NEXT: 1 2 1.00 sxtb16ge r2, r3, ror #24 +# CHECK-NEXT: 1 1 0.50 sxthne r3, r9 +# CHECK-NEXT: 1 1 0.50 sxth r1, r6 +# CHECK-NEXT: 1 1 0.50 sxth r3, r8, ror #8 +# CHECK-NEXT: 1 1 0.50 sxthle r2, r2, ror #16 +# CHECK-NEXT: 1 1 0.50 sxth r9, r3, ror #24 +# CHECK-NEXT: 1 4 1.00 uxtab16ge r0, r1, r4 +# CHECK-NEXT: 1 4 1.00 uxtab16 r6, r2, r7 +# CHECK-NEXT: 1 4 1.00 uxtab16 r3, r5, r8, ror #8 +# CHECK-NEXT: 1 4 1.00 uxtab16 r3, r2, r1, ror #16 +# CHECK-NEXT: 1 4 1.00 uxtab16eq r1, r2, r3, ror #24 +# CHECK-NEXT: 1 1 0.50 uxtbge r2, r4 +# CHECK-NEXT: 1 1 0.50 uxtb r5, r6 +# CHECK-NEXT: 1 1 0.50 uxtb r6, r9, ror #8 +# CHECK-NEXT: 1 1 0.50 uxtblo r5, r1, ror #16 +# CHECK-NEXT: 1 1 0.50 uxtb r8, r3, ror #24 +# CHECK-NEXT: 1 2 1.00 uxtb16 r1, r4 +# CHECK-NEXT: 1 2 1.00 uxtb16 r6, r7 +# CHECK-NEXT: 1 2 1.00 uxtb16hs r3, r5, ror #8 +# CHECK-NEXT: 1 2 1.00 uxtb16 r3, r1, ror #16 +# CHECK-NEXT: 1 2 1.00 uxtb16ge r2, r3, ror #24 +# CHECK-NEXT: 1 1 0.50 uxthne r3, r9 +# CHECK-NEXT: 1 1 0.50 uxth r1, r6 +# CHECK-NEXT: 1 1 0.50 uxth r3, r8, ror #8 +# CHECK-NEXT: 1 1 0.50 uxthle r2, r2, ror #16 +# CHECK-NEXT: 1 1 0.50 uxth r9, r3, ror #24 Index: llvm/utils/TableGen/CodeGenSchedule.h =================================================================== --- llvm/utils/TableGen/CodeGenSchedule.h +++ llvm/utils/TableGen/CodeGenSchedule.h @@ -140,6 +140,8 @@ // Instructions should be ignored by this class because they have been split // off to join another inferred class. RecVec InstRWs; + // InstRWs processor indices. Filled in inferFromInstRWs + DenseSet InstRWProcIndices; CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef) : Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {} Index: llvm/utils/TableGen/CodeGenSchedule.cpp =================================================================== --- llvm/utils/TableGen/CodeGenSchedule.cpp +++ llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1281,6 +1281,7 @@ findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index; inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses. + SchedClasses[SCIdx].InstRWProcIndices.insert(PIdx); } } @@ -1639,29 +1640,53 @@ } } +static void addSequences(CodeGenSchedModels &SchedModels, + const SmallVectorImpl> &Seqs, + IdxVec &Result, bool IsRead) { + for (const auto &S : Seqs) + if (!S.empty()) + Result.push_back(SchedModels.findOrInsertRW(S, IsRead)); +} + +static void dumpTransition(const CodeGenSchedModels &SchedModels, + const CodeGenSchedClass &FromSC, + const CodeGenSchedTransition &SCTrans) { + LLVM_DEBUG(dbgs() << "Adding transition from " << FromSC.Name << "(" + << FromSC.Index << ") to " + << SchedModels.getSchedClass(SCTrans.ToClassIdx).Name << "(" + << SCTrans.ToClassIdx << ")" + << " on processor indices: ("; + dumpIdxVec(SCTrans.ProcIndices); dbgs() << ")\n"); +} // Create a new SchedClass for each variant found by inferFromRW. Pass static void inferFromTransitions(ArrayRef LastTransitions, unsigned FromClassIdx, CodeGenSchedModels &SchedModels) { // For each PredTransition, create a new CodeGenSchedTransition, which usually // requires creating a new SchedClass. + const CodeGenSchedClass &FromSC = SchedModels.getSchedClass(FromClassIdx); for (ArrayRef::iterator I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) { - IdxVec OperWritesVariant; - transform(I->WriteSequences, std::back_inserter(OperWritesVariant), - [&SchedModels](ArrayRef WS) { - return SchedModels.findOrInsertRW(WS, /*IsRead=*/false); - }); - IdxVec OperReadsVariant; - transform(I->ReadSequences, std::back_inserter(OperReadsVariant), - [&SchedModels](ArrayRef RS) { - return SchedModels.findOrInsertRW(RS, /*IsRead=*/true); - }); + IdxVec OperWritesVariant, OperReadsVariant; + addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false); + addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true); CodeGenSchedTransition SCTrans; - SCTrans.ToClassIdx = - SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant, - OperReadsVariant, I->ProcIndices); SCTrans.ProcIndices.assign(I->ProcIndices.begin(), I->ProcIndices.end()); + + // Remove all processor indices from this sched transition for which + // we also have InstRWs. + SCTrans.ProcIndices.erase( + llvm::remove_if(SCTrans.ProcIndices, + [&FromSC](unsigned PIdx) { + return FromSC.InstRWProcIndices.count(PIdx); + }), + SCTrans.ProcIndices.end()); + if (SCTrans.ProcIndices.empty()) + continue; + SCTrans.ToClassIdx = + SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant, + OperReadsVariant, I->ProcIndices); + dumpTransition(SchedModels, FromSC, SCTrans); // The final PredTerm is unique set of predicates guarding the transition. RecVec Preds; transform(I->PredTerm, std::back_inserter(Preds), @@ -1684,7 +1709,6 @@ ArrayRef ProcIndices) { LLVM_DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") "); - // Create a seed transition with an empty PredTerm and the expanded sequences // of SchedWrites for the current SchedClass. std::vector LastTransitions;