Index: include/llvm/CodeGen/ModuloSchedule.h =================================================================== --- include/llvm/CodeGen/ModuloSchedule.h +++ include/llvm/CodeGen/ModuloSchedule.h @@ -254,6 +254,24 @@ void expand(); }; +/// Expander that simply annotates each scheduled instruction with a post-instr +/// symbol that can be consumed by the ModuloScheduleTest pass. +/// +/// The post-instr symbol is a way of annotating an instruction that can be +/// roundtripped in MIR. The syntax is: +/// MYINST %0, post-instr-symbol +class ModuloScheduleTestAnnotater { + MachineFunction &MF; + ModuloSchedule &S; + +public: + ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S) + : MF(MF), S(S) {} + + /// Performs the annotation. + void annotate(); +}; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -288,6 +288,7 @@ void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); +void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -68,6 +68,7 @@ initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); + initializeModuloScheduleTestPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); Index: lib/CodeGen/MachinePipeliner.cpp =================================================================== --- lib/CodeGen/MachinePipeliner.cpp +++ lib/CodeGen/MachinePipeliner.cpp @@ -154,6 +154,12 @@ static cl::opt SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false)); +static cl::opt EmitTestAnnotations( + "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), + cl::desc("Instead of emitting the pipelined code, annotate instructions " + "with the generated schedule for feeding into the " + "-modulo-schedule-test pass")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -536,6 +542,13 @@ ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles), std::move(Stages)); + if (EmitTestAnnotations) { + assert(NewInstrChanges.empty() && + "Cannot serialize a schedule with InstrChanges!"); + ModuloScheduleTestAnnotater MSTI(MF, MS); + MSTI.annotate(); + return; + } ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); MSE.expand(); ++NumPipelined; Index: lib/CodeGen/ModuloSchedule.cpp =================================================================== --- lib/CodeGen/ModuloSchedule.cpp +++ lib/CodeGen/ModuloSchedule.cpp @@ -7,14 +7,21 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ModuloSchedule.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "pipeliner" using namespace llvm; +//===----------------------------------------------------------------------===// +// ModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// + /// Return the register values for the operands of a Phi instruction. /// This function assume the instruction is a Phi. static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, @@ -1196,3 +1203,110 @@ int LoopStage = Schedule.getStage(Use); return (LoopCycle > DefCycle) || (LoopStage <= DefStage); } + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestPass implementation +//===----------------------------------------------------------------------===// +// This pass constructs a ModuloSchedule from its module and runs +// ModuloScheduleExpander. +// +// The module is expected to contain a single-block analyzable loop. +// The total order of instructions is taken from the loop as-is. +// Instructions are expected to be annotated with a PostInstrSymbol. +// This PostInstrSymbol must have the following format: +// "Stage=%d Cycle=%d". +//===----------------------------------------------------------------------===// + +class ModuloScheduleTest : public MachineFunctionPass { +public: + static char ID; + + ModuloScheduleTest() : MachineFunctionPass(ID) { + initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void runOnLoop(MachineFunction &MF, MachineLoop &L); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +char ModuloScheduleTest::ID = 0; + +INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) + +bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { + MachineLoopInfo &MLI = getAnalysis(); + for (auto *L : MLI) { + if (L->getTopBlock() != L->getBottomBlock()) + continue; + runOnLoop(MF, *L); + return false; + } + return false; +} + +static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { + std::pair StageAndCycle = getToken(S, "_"); + std::pair StageTokenAndValue = + getToken(StageAndCycle.first, "-"); + std::pair CycleTokenAndValue = + getToken(StageAndCycle.second, "-"); + if (StageTokenAndValue.first != "Stage" || + CycleTokenAndValue.first != "_Cycle") { + llvm_unreachable( + "Bad post-instr symbol syntax: see comment in ModuloScheduleTest"); + return; + } + + StageTokenAndValue.second.drop_front().getAsInteger(10, Stage); + CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle); + + dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n"; +} + +void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { + LiveIntervals &LIS = getAnalysis(); + MachineBasicBlock *BB = L.getTopBlock(); + dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; + + DenseMap Cycle, Stage; + std::vector Instrs; + for (MachineInstr &MI : *BB) { + if (MI.isTerminator()) + continue; + Instrs.push_back(&MI); + if (MCSymbol *Sym = MI.getPostInstrSymbol()) { + dbgs() << "Parsing post-instr symbol for " << MI; + parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]); + } + } + + ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), std::move(Stage)); + ModuloScheduleExpander MSE( + MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestAnnotater implementation +//===----------------------------------------------------------------------===// + +void ModuloScheduleTestAnnotater::annotate() { + for (MachineInstr *MI : S.getInstructions()) { + SmallVector SV; + raw_svector_ostream OS(SV); + OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI); + MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str()); + MI->setPostInstrSymbol(MF, Sym); + } +} Index: test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir @@ -0,0 +1,151 @@ +# RUN: llc < %s -x mir -march=hexagon -run-pass=modulo-schedule-test | FileCheck %s + +# Simple check for this sanity test; ensure all instructions are in stage 0 in +# the prolog and stage 3 in the epilog. + +# CHECK-NOT: Stage-3 +# CHECK: J2_loop0r +# CHECK: intregs = S2_addasl_rrri %{{[0-9]+}}, %{{[0-9]+}}, 1, post-instr-symbol +# CHECK: intregs = L2_loadruh_io %{{[0-9]+}}, -4, post-instr-symbol :: (load 2 from %ir.cgep2, !tbaa !0) +# CHECK: intregs = S2_storerh_pi %{{[0-9]+}}, -2, %{{[0-9]+}}, post-instr-symbol :: (store 2 into %ir.lsr.iv, !tbaa !0) +# CHECK: intregs = nsw A2_addi %{{[0-9]+}}, -1, post-instr-symbol +# CHECK: ENDLOOP0 %bb.{{[0-9]+}}, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 +# CHECK-NOT: Stage-0 + +--- | + ; ModuleID = '/google/src/cloud/jmolloy/tc/google3/third_party/llvm/llvm/test/CodeGen/Hexagon/swp-phi-start.ll' + source_filename = "/google/src/cloud/jmolloy/tc/google3/third_party/llvm/llvm/test/CodeGen/Hexagon/swp-phi-start.ll" + target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + + ; Function Attrs: nounwind + define void @f0(i32 %a0, i16* nocapture %a1) #0 { + b0: + br i1 undef, label %b1, label %b2.preheader + + b1: ; preds = %b0 + br i1 undef, label %b3, label %b2.preheader + + b2.preheader: ; preds = %b0, %b1 + %cgep = getelementptr i16, i16* %a1, i32 undef + br label %b2 + + b2: ; preds = %b2.preheader, %b2 + %lsr.iv = phi i16* [ %cgep, %b2.preheader ], [ %cgep3, %b2 ] + %v1 = phi i32 [ %v7, %b2 ], [ undef, %b2.preheader ] + %v2 = phi i32 [ %v1, %b2 ], [ %a0, %b2.preheader ] + %v3 = add nsw i32 %v2, -2 + %cgep2 = getelementptr inbounds i16, i16* %a1, i32 %v3 + %v5 = load i16, i16* %cgep2, align 2, !tbaa !0 + store i16 %v5, i16* %lsr.iv, align 2, !tbaa !0 + %v7 = add nsw i32 %v1, -1 + %v8 = icmp sgt i32 %v7, 0 + %cgep3 = getelementptr i16, i16* %lsr.iv, i32 -1 + br i1 %v8, label %b2, label %b3 + + b3: ; preds = %b2, %b1 + ret void + } + + attributes #0 = { nounwind "target-cpu"="hexagonv55" } + + !0 = !{!1, !1, i64 0} + !1 = !{!"short", !2, i64 0} + !2 = !{!"omnipotent char", !3, i64 0} + !3 = !{!"Simple C/C++ TBAA"} + +... +--- +name: f0 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: intregs, preferred-register: '' } + - { id: 1, class: intregs, preferred-register: '' } + - { id: 2, class: intregs, preferred-register: '' } + - { id: 3, class: intregs, preferred-register: '' } + - { id: 4, class: intregs, preferred-register: '' } + - { id: 5, class: intregs, preferred-register: '' } + - { id: 6, class: intregs, preferred-register: '' } + - { id: 7, class: intregs, preferred-register: '' } + - { id: 8, class: predregs, preferred-register: '' } + - { id: 9, class: predregs, preferred-register: '' } + - { id: 10, class: intregs, preferred-register: '' } + - { id: 11, class: intregs, preferred-register: '' } + - { id: 12, class: intregs, preferred-register: '' } + - { id: 13, class: predregs, preferred-register: '' } + - { id: 14, class: intregs, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%6' } + - { reg: '$r1', virtual-reg: '%7' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.b0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1 + + %7:intregs = COPY $r1 + %6:intregs = COPY $r0 + %8:predregs = IMPLICIT_DEF + J2_jumpt %8, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.b1: + successors: %bb.4(0x40000000), %bb.2(0x40000000) + + %9:predregs = IMPLICIT_DEF + J2_jumpt %9, %bb.4, implicit-def dead $pc + J2_jump %bb.2, implicit-def dead $pc + + bb.2.b2.preheader: + successors: %bb.3(0x80000000) + + %10:intregs = IMPLICIT_DEF + %14:intregs = COPY %10 + J2_loop0r %bb.3, %14, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.3.b2 (address-taken): + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + + %1:intregs = PHI %7, %bb.2, %5, %bb.3, post-instr-symbol + %2:intregs = PHI %10, %bb.2, %4, %bb.3, post-instr-symbol + %3:intregs = PHI %6, %bb.2, %2, %bb.3, post-instr-symbol + %11:intregs = S2_addasl_rrri %7, %3, 1, post-instr-symbol + %12:intregs = L2_loadruh_io %11, -4, post-instr-symbol :: (load 2 from %ir.cgep2, !tbaa !0) + %5:intregs = S2_storerh_pi %1, -2, %12, post-instr-symbol :: (store 2 into %ir.lsr.iv, !tbaa !0) + %4:intregs = nsw A2_addi %2, -1, post-instr-symbol + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.4, implicit-def dead $pc + + bb.4.b3: + PS_jmpret $r31, implicit-def dead $pc + +...