diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7347,16 +7347,17 @@ return ILV.getOrCreateScalarValue(V, Instance); } -void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; IG->getInsertPos()->printAsOperand(O, false); O << ", "; - getAddr()->printAsOperand(O); + getAddr()->printAsOperand(O, SlotTracker); VPValue *Mask = getMask(); if (Mask) { O << ", "; - Mask->printAsOperand(O); + Mask->printAsOperand(O, SlotTracker); } O << "\\l\""; for (unsigned i = 0; i < IG->getFactor(); ++i) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -633,7 +633,9 @@ virtual void execute(struct VPTransformState &State) = 0; /// Each recipe prints itself. - virtual void print(raw_ostream &O, const Twine &Indent) const = 0; + void print(raw_ostream &O, const Twine &Indent); + virtual void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const = 0; /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. @@ -719,10 +721,12 @@ void execute(VPTransformState &State) override; /// Print the Recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; /// Print the VPInstruction. void print(raw_ostream &O) const; + void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; /// Return true if this instruction may modify memory. bool mayWriteToMemory() const { @@ -768,7 +772,8 @@ } /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling GEP instructions. @@ -798,7 +803,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling phi nodes of integer and floating-point inductions, @@ -823,7 +829,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for handling all phi nodes except for integer and FP inductions. @@ -844,7 +851,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select @@ -875,7 +883,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// VPInterleaveRecipe is a recipe for transforming an interleave group of load @@ -915,7 +924,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; const InterleaveGroup *getInterleaveGroup() { return IG; } }; @@ -965,7 +975,8 @@ void setAlsoPack(bool Pack) { AlsoPack = Pack; } /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A recipe for generating conditional branches on the bits of a mask. @@ -989,10 +1000,11 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override { O << " +\n" << Indent << "\"BRANCH-ON-MASK "; if (User) - O << *User->getOperand(0); + User->getOperand(0)->print(O, SlotTracker); else O << " All-One"; O << "\\l\""; @@ -1024,7 +1036,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// A Recipe for widening load/store operations. @@ -1064,7 +1077,8 @@ void execute(VPTransformState &State) override; /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; }; /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It @@ -1352,6 +1366,7 @@ } }; +class VPSlotTracker; /// VPlan models a candidate for vectorization, encoding various decisions take /// to produce efficient output IR, including which branches, basic-blocks and /// output IR instructions to generate, and their cost. VPlan holds a @@ -1359,6 +1374,7 @@ /// VPBlock. class VPlan { friend class VPlanPrinter; + friend class VPSlotTracker; private: /// Hold the single entry to the Hierarchical CFG of the VPlan. @@ -1392,7 +1408,10 @@ SmallVector VPCBVs; public: - VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} + VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { + if (Entry) + Entry->setPlan(this); + } ~VPlan() { if (Entry) @@ -1496,7 +1515,10 @@ unsigned BID = 0; SmallDenseMap BlockID; - VPlanPrinter(raw_ostream &O, const VPlan &P) : OS(O), Plan(P) {} + VPSlotTracker SlotTracker; + + VPlanPrinter(raw_ostream &O, const VPlan &P) + : OS(O), Plan(P), SlotTracker(&P) {} /// Handle indentation. void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -49,13 +49,20 @@ #define DEBUG_TYPE "vplan" raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { - if (const VPInstruction *Instr = dyn_cast(&V)) - Instr->print(OS); - else - V.printAsOperand(OS); + const VPInstruction *Instr = dyn_cast(&V); + VPSlotTracker SlotTracker( + (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); + V.print(OS, SlotTracker); return OS; } +void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { + if (const VPInstruction *Instr = dyn_cast(this)) + Instr->print(OS, SlotTracker); + else + printAsOperand(OS, SlotTracker); +} + // Get the top-most entry block of \p Start. This is the entry block of the // containing VPlan. This function is templated to support both const and non-const blocks template static T *getPlanEntry(T *Start) { @@ -384,14 +391,20 @@ generateInstruction(State, Part); } -void VPInstruction::print(raw_ostream &O, const Twine &Indent) const { +void VPInstruction::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"EMIT "; - print(O); + print(O, SlotTracker); O << "\\l\""; } void VPInstruction::print(raw_ostream &O) const { - printAsOperand(O); + VPSlotTracker SlotTracker(getParent()->getPlan()); + print(O, SlotTracker); +} + +void VPInstruction::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { + printAsOperand(O, SlotTracker); O << " = "; switch (getOpcode()) { @@ -413,7 +426,7 @@ for (const VPValue *Operand : operands()) { O << " "; - Operand->printAsOperand(O); + Operand->printAsOperand(O, SlotTracker); } } @@ -567,10 +580,14 @@ OS << "\\n" << DOT::EscapeString(Plan.getName()); if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) { OS << ", where:"; - if (Plan.BackedgeTakenCount) - OS << "\\n" << *Plan.BackedgeTakenCount << " := BackedgeTakenCount"; + if (Plan.BackedgeTakenCount) { + OS << "\\n"; + Plan.BackedgeTakenCount->print(OS, SlotTracker); + OS << " := BackedgeTakenCount"; + } for (auto Entry : Plan.Value2VPValue) { - OS << "\\n" << *Entry.second; + OS << "\\n"; + Entry.second->print(OS, SlotTracker); OS << DOT::EscapeString(" := "); Entry.first->printAsOperand(OS, false); } @@ -637,25 +654,25 @@ if (Pred) { OS << " +\n" << Indent << " \"BlockPredicate: "; if (const VPInstruction *PredI = dyn_cast(Pred)) { - PredI->printAsOperand(OS); + PredI->printAsOperand(OS, SlotTracker); OS << " (" << DOT::EscapeString(PredI->getParent()->getName()) << ")\\l\""; } else - Pred->printAsOperand(OS); + Pred->printAsOperand(OS, SlotTracker); } for (const VPRecipeBase &Recipe : *BasicBlock) - Recipe.print(OS, Indent); + Recipe.print(OS, Indent, SlotTracker); // Dump the condition bit. const VPValue *CBV = BasicBlock->getCondBit(); if (CBV) { OS << " +\n" << Indent << " \"CondBit: "; if (const VPInstruction *CBI = dyn_cast(CBV)) { - CBI->printAsOperand(OS); + CBI->printAsOperand(OS, SlotTracker); OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\""; } else { - CBV->printAsOperand(OS); + CBV->printAsOperand(OS, SlotTracker); OS << "\""; } } @@ -702,14 +719,15 @@ O << DOT::EscapeString(IngredientString); } -void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN\\l\""; for (auto &Instr : make_range(Begin, End)) O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\""; } -void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, - const Twine &Indent) const { +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-INDUCTION"; if (Trunc) { O << "\\l\""; @@ -719,7 +737,8 @@ O << " " << VPlanIngredient(IV) << "\\l\""; } -void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-GEP "; O << (IsPtrLoopInvariant ? "Inv" : "Var"); size_t IndicesNumber = IsIndexLoopInvariant.size(); @@ -729,11 +748,13 @@ O << " +\n" << Indent << "\" " << VPlanIngredient(GEP) << "\\l\""; } -void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; } -void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"BLEND "; Phi->printAsOperand(O, false); O << " ="; @@ -747,13 +768,14 @@ O << " "; Phi->getIncomingValue(I)->printAsOperand(O, false); O << "/"; - User->getOperand(I)->printAsOperand(O); + User->getOperand(I)->printAsOperand(O, SlotTracker); } } O << "\\l\""; } -void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") << VPlanIngredient(Ingredient); @@ -762,21 +784,22 @@ O << "\\l\""; } -void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) << "\\l\""; } -void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, - const Twine &Indent) const { +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); O << ", "; - getAddr()->printAsOperand(O); + getAddr()->printAsOperand(O, SlotTracker); VPValue *Mask = getMask(); if (Mask) { O << ", "; - Mask->printAsOperand(O); + Mask->printAsOperand(O, SlotTracker); } O << "\\l\""; } @@ -790,6 +813,14 @@ User->setOperand(I, New); } +void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { + unsigned Slot = Tracker.getSlot(this); + if (Slot == unsigned(-1)) + OS << ""; + else + OS << "%vp" << Tracker.getSlot(this); +} + void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, InterleavedAccessInfo &IAI) { @@ -835,3 +866,48 @@ Old2NewTy Old2New; visitRegion(cast(Plan.getEntry()), Old2New, IAI); } + +void VPSlotTracker::assignSlot(const VPValue *V) { + assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!"); + Slots[V] = NextSlot++; +} + +void VPSlotTracker::assignSlots(const VPBlockBase *VPBB) { + if (auto *Region = dyn_cast(VPBB)) + assignSlots(Region); + else + assignSlots(cast(VPBB)); +} + +void VPSlotTracker::assignSlots(const VPRegionBlock *Region) { + ReversePostOrderTraversal RPOT(Region->getEntry()); + for (const VPBlockBase *Block : RPOT) + assignSlots(Block); +} + +void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) { + for (const VPRecipeBase &Recipe : *VPBB) { + if (const auto *VPI = dyn_cast(&Recipe)) + assignSlot(VPI); + } +} + +void VPSlotTracker::assignSlots(const VPlan &Plan) { + + for (const VPValue *V : Plan.VPExternalDefs) + assignSlot(V); + + for (auto &E : Plan.Value2VPValue) + if (!isa(E.second)) + assignSlot(E.second); + + for (const VPValue *V : Plan.VPCBVs) + assignSlot(V); + + if (Plan.BackedgeTakenCount) + assignSlot(Plan.BackedgeTakenCount); + + ReversePostOrderTraversal RPOT(Plan.getEntry()); + for (const VPBlockBase *Block : RPOT) + assignSlots(Block); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -31,6 +31,8 @@ // Forward declarations. class VPUser; +class VPSlotTracker; + // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins // coming from the input IR, instructions which VPlan will generate if executed @@ -85,9 +87,8 @@ /// for any other purpose, as the values may change as LLVM evolves. unsigned getVPValueID() const { return SubclassID; } - void printAsOperand(raw_ostream &OS) const { - OS << "%vp" << (unsigned short)(unsigned long long)this; - } + void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const; + void print(raw_ostream &OS, VPSlotTracker &Tracker) const; unsigned getNumUsers() const { return Users.size(); } void addUser(VPUser &User) { Users.push_back(&User); } @@ -180,6 +181,38 @@ return const_operand_range(op_begin(), op_end()); } }; +class VPlan; +class VPBasicBlock; +class VPRegionBlock; + +/// This class can be used to assign consecutive numbers to all VPValues in a +/// VPlan and allows querying the numbering for printing, similar to the +/// ModuleSlotTracker for IR values. +class VPSlotTracker { +private: + DenseMap Slots; + unsigned NextSlot = 0; + + void assignSlots(const VPBlockBase *VPBB); + void assignSlots(const VPRegionBlock *Region); + void assignSlots(const VPBasicBlock *VPBB); + void assignSlot(const VPValue *V); + + void assignSlots(const VPlan &Plan); + +public: + VPSlotTracker(const VPlan *Plan) { + if (Plan) + assignSlots(*Plan); + } + + unsigned getSlot(const VPValue *V) const { + auto I = Slots.find(V); + if (I == Slots.end()) + return -1; + return I->second; + } +}; } // namespace llvm diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -11,6 +11,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "gtest/gtest.h" +#include namespace llvm { namespace { @@ -176,5 +177,75 @@ } } +TEST(VPBasicBlockTest, print) { + VPInstruction *I1 = new VPInstruction(10, {}); + VPInstruction *I2 = new VPInstruction(1, {I1}); + VPInstruction *I3 = new VPInstruction(2, {I1, I2}); + + VPBasicBlock *VPBB1 = new VPBasicBlock(); + VPBB1->appendRecipe(I1); + VPBB1->appendRecipe(I2); + VPBB1->appendRecipe(I3); + + VPInstruction *I4 = new VPInstruction(4, {I3, I2}); + VPInstruction *I5 = new VPInstruction(5, {I1}); + VPBasicBlock *VPBB2 = new VPBasicBlock(); + VPBB2->appendRecipe(I4); + VPBB2->appendRecipe(I5); + + VPBlockUtils::connectBlocks(VPBB1, VPBB2); + + // Check printing an instruction without associated VPlan. + { + std::string I3Dump; + raw_string_ostream OS(I3Dump); + I3->print(OS); + OS.flush(); + EXPECT_EQ(" = br ", I3Dump); + } + + VPlan Plan; + Plan.setEntry(VPBB1); + std::string FullDump; + raw_string_ostream(FullDump) << Plan; + + EXPECT_EQ(R"(digraph VPlan { +graph [labelloc=t, fontsize=30; label="Vectorization Plan"] +node [shape=rect, fontname=Courier, fontsize=30] +edge [fontname=Courier, fontsize=30] +compound=true + N0 [label = + ":\n" + + "EMIT %vp0 = catchswitch\l" + + "EMIT %vp1 = ret %vp0\l" + + "EMIT %vp2 = br %vp0 %vp1\l" + ] + N0 -> N1 [ label=""] + N1 [label = + ":\n" + + "EMIT %vp3 = indirectbr %vp2 %vp1\l" + + "EMIT %vp4 = invoke %vp0\l" + ] +} +)", + FullDump); + + { + std::string I3Dump; + raw_string_ostream OS(I3Dump); + I3->print(OS); + OS.flush(); + EXPECT_EQ("%vp2 = br %vp0 %vp1", I3Dump); + } + + { + std::string I2Dump; + raw_string_ostream OS(I2Dump); + OS << *I2; + OS.flush(); + EXPECT_EQ("%vp1 = ret %vp0", I2Dump); + } +} + } // namespace } // namespace llvm