15
15
// / treated as proper graphs for generic algorithms;
16
16
// / 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
17
17
// / within VPBasicBlocks;
18
- // / 4. The VPlan class holding a candidate for vectorization;
19
- // / 5. The VPlanPrinter class providing a way to print a plan in dot format.
18
+ // / 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
19
+ // / instruction;
20
+ // / 5. The VPlan class holding a candidate for vectorization;
21
+ // / 6. The VPlanPrinter class providing a way to print a plan in dot format;
20
22
// / These are documented in docs/VectorizationPlan.rst.
21
23
//
22
24
// ===----------------------------------------------------------------------===//
23
25
24
26
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25
27
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
28
29
+ #include " VPlanValue.h"
27
30
#include " llvm/ADT/DenseMap.h"
28
31
#include " llvm/ADT/GraphTraits.h"
29
32
#include " llvm/ADT/Optional.h"
39
42
#include < map>
40
43
#include < string>
41
44
45
+ // The (re)use of existing LoopVectorize classes is subject to future VPlan
46
+ // refactoring.
47
+ namespace {
48
+ class LoopVectorizationLegality ;
49
+ class LoopVectorizationCostModel ;
50
+ } // namespace
51
+
42
52
namespace llvm {
43
53
44
54
class BasicBlock ;
@@ -82,6 +92,8 @@ struct VPIteration {
82
92
// / Entries from either map can be retrieved using the getVectorValue and
83
93
// / getScalarValue functions, which assert that the desired value exists.
84
94
struct VectorizerValueMap {
95
+ friend struct VPTransformState ;
96
+
85
97
private:
86
98
// / The unroll factor. Each entry in the vector map contains UF vector values.
87
99
unsigned UF;
@@ -195,14 +207,21 @@ struct VectorizerValueMap {
195
207
}
196
208
};
197
209
210
+ // / This class is used to enable the VPlan to invoke a method of ILV. This is
211
+ // / needed until the method is refactored out of ILV and becomes reusable.
212
+ struct VPCallback {
213
+ virtual ~VPCallback () {}
214
+ virtual Value *getOrCreateVectorValues (Value *V, unsigned Part) = 0;
215
+ };
216
+
198
217
// / VPTransformState holds information passed down when "executing" a VPlan,
199
218
// / needed for generating the output IR.
200
219
struct VPTransformState {
201
220
VPTransformState (unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT,
202
221
IRBuilder<> &Builder, VectorizerValueMap &ValueMap,
203
- InnerLoopVectorizer *ILV)
204
- : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ValueMap(ValueMap ),
205
- ILV (ILV) {}
222
+ InnerLoopVectorizer *ILV, VPCallback &Callback )
223
+ : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
224
+ ValueMap (ValueMap), ILV(ILV), Callback(Callback ) {}
206
225
207
226
// / The chosen Vectorization and Unroll Factors of the loop being vectorized.
208
227
unsigned VF;
@@ -213,6 +232,37 @@ struct VPTransformState {
213
232
// / instructions.
214
233
Optional<VPIteration> Instance;
215
234
235
+ struct DataState {
236
+ // / A type for vectorized values in the new loop. Each value from the
237
+ // / original loop, when vectorized, is represented by UF vector values in
238
+ // / the new unrolled loop, where UF is the unroll factor.
239
+ typedef SmallVector<Value *, 2 > PerPartValuesTy;
240
+
241
+ DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
242
+ } Data;
243
+
244
+ // / Get the generated Value for a given VPValue and a given Part. Note that
245
+ // / as some Defs are still created by ILV and managed in its ValueMap, this
246
+ // / method will delegate the call to ILV in such cases in order to provide
247
+ // / callers a consistent API.
248
+ // / \see set.
249
+ Value *get (VPValue *Def, unsigned Part) {
250
+ // If Values have been set for this Def return the one relevant for \p Part.
251
+ if (Data.PerPartOutput .count (Def))
252
+ return Data.PerPartOutput [Def][Part];
253
+ // Def is managed by ILV: bring the Values from ValueMap.
254
+ return Callback.getOrCreateVectorValues (VPValue2Value[Def], Part);
255
+ }
256
+
257
+ // / Set the generated Value for a given VPValue and a given Part.
258
+ void set (VPValue *Def, Value *V, unsigned Part) {
259
+ if (!Data.PerPartOutput .count (Def)) {
260
+ DataState::PerPartValuesTy Entry (UF);
261
+ Data.PerPartOutput [Def] = Entry;
262
+ }
263
+ Data.PerPartOutput [Def][Part] = V;
264
+ }
265
+
216
266
// / Hold state information used when constructing the CFG of the output IR,
217
267
// / traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
218
268
struct CFGState {
@@ -247,8 +297,14 @@ struct VPTransformState {
247
297
// / Values of the output IR.
248
298
VectorizerValueMap &ValueMap;
249
299
300
+ // / Hold a reference to a mapping between VPValues in VPlan and original
301
+ // / Values they correspond to.
302
+ VPValue2ValueTy VPValue2Value;
303
+
250
304
// / Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
251
305
InnerLoopVectorizer *ILV;
306
+
307
+ VPCallback &Callback;
252
308
};
253
309
254
310
// / VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -454,6 +510,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
454
510
using VPRecipeTy = enum {
455
511
VPBlendSC,
456
512
VPBranchOnMaskSC,
513
+ VPInstructionSC,
457
514
VPInterleaveSC,
458
515
VPPredInstPHISC,
459
516
VPReplicateSC,
@@ -483,6 +540,52 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
483
540
virtual void print (raw_ostream &O, const Twine &Indent) const = 0;
484
541
};
485
542
543
+ // / This is a concrete Recipe that models a single VPlan-level instruction.
544
+ // / While as any Recipe it may generate a sequence of IR instructions when
545
+ // / executed, these instructions would always form a single-def expression as
546
+ // / the VPInstruction is also a single def-use vertex.
547
+ class VPInstruction : public VPUser , public VPRecipeBase {
548
+ public:
549
+ // / VPlan opcodes, extending LLVM IR with idiomatics instructions.
550
+ enum { Not = Instruction::OtherOpsEnd + 1 };
551
+
552
+ private:
553
+ typedef unsigned char OpcodeTy;
554
+ OpcodeTy Opcode;
555
+
556
+ // / Utility method serving execute(): generates a single instance of the
557
+ // / modeled instruction.
558
+ void generateInstruction (VPTransformState &State, unsigned Part);
559
+
560
+ public:
561
+ VPInstruction (unsigned Opcode, std::initializer_list<VPValue *> Operands)
562
+ : VPUser(VPValue::VPInstructionSC, Operands),
563
+ VPRecipeBase (VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
564
+
565
+ // / Method to support type inquiry through isa, cast, and dyn_cast.
566
+ static inline bool classof (const VPValue *V) {
567
+ return V->getVPValueID () == VPValue::VPInstructionSC;
568
+ }
569
+
570
+ // / Method to support type inquiry through isa, cast, and dyn_cast.
571
+ static inline bool classof (const VPRecipeBase *R) {
572
+ return R->getVPRecipeID () == VPRecipeBase::VPInstructionSC;
573
+ }
574
+
575
+ unsigned getOpcode () const { return Opcode; }
576
+
577
+ // / Generate the instruction.
578
+ // / TODO: We currently execute only per-part unless a specific instance is
579
+ // / provided.
580
+ void execute (VPTransformState &State) override ;
581
+
582
+ // / Print the Recipe.
583
+ void print (raw_ostream &O, const Twine &Indent) const override ;
584
+
585
+ // / Print the VPInstruction.
586
+ void print (raw_ostream &O) const ;
587
+ };
588
+
486
589
// / VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
487
590
// / holds a sequence of zero or more VPRecipe's each representing a sequence of
488
591
// / output IR instructions.
@@ -539,15 +642,17 @@ class VPBasicBlock : public VPBlockBase {
539
642
return V->getVPBlockID () == VPBlockBase::VPBasicBlockSC;
540
643
}
541
644
542
- // / Augment the existing recipes of a VPBasicBlock with an additional
543
- // / \p Recipe as the last recipe.
544
- void appendRecipe (VPRecipeBase *Recipe) {
645
+ void insert (VPRecipeBase *Recipe, iterator InsertPt) {
545
646
assert (Recipe && " No recipe to append." );
546
647
assert (!Recipe->Parent && " Recipe already in VPlan" );
547
648
Recipe->Parent = this ;
548
- return Recipes.push_back ( Recipe);
649
+ Recipes.insert (InsertPt, Recipe);
549
650
}
550
651
652
+ // / Augment the existing recipes of a VPBasicBlock with an additional
653
+ // / \p Recipe as the last recipe.
654
+ void appendRecipe (VPRecipeBase *Recipe) { insert (Recipe, end ()); }
655
+
551
656
// / The method which generates the output IR instructions that correspond to
552
657
// / this VPBasicBlock, thereby "executing" the VPlan.
553
658
void execute (struct VPTransformState *State) override ;
@@ -620,6 +725,8 @@ class VPRegionBlock : public VPBlockBase {
620
725
// / Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
621
726
// / VPBlock.
622
727
class VPlan {
728
+ friend class VPlanPrinter ;
729
+
623
730
private:
624
731
// / Hold the single entry to the Hierarchical CFG of the VPlan.
625
732
VPBlockBase *Entry;
@@ -630,12 +737,18 @@ class VPlan {
630
737
// / Holds the name of the VPlan, for printing.
631
738
std::string Name;
632
739
740
+ // / Holds a mapping between Values and their corresponding VPValue inside
741
+ // / VPlan.
742
+ Value2VPValueTy Value2VPValue;
743
+
633
744
public:
634
745
VPlan (VPBlockBase *Entry = nullptr ) : Entry(Entry) {}
635
746
636
747
~VPlan () {
637
748
if (Entry)
638
749
VPBlockBase::deleteCFG (Entry);
750
+ for (auto &MapEntry : Value2VPValue)
751
+ delete MapEntry.second ;
639
752
}
640
753
641
754
// / Generate the IR code for this VPlan.
@@ -654,6 +767,18 @@ class VPlan {
654
767
655
768
void setName (const Twine &newName) { Name = newName.str (); }
656
769
770
+ void addVPValue (Value *V) {
771
+ assert (V && " Trying to add a null Value to VPlan" );
772
+ assert (!Value2VPValue.count (V) && " Value already exists in VPlan" );
773
+ Value2VPValue[V] = new VPValue ();
774
+ }
775
+
776
+ VPValue *getVPValue (Value *V) {
777
+ assert (V && " Trying to get the VPValue of a null Value" );
778
+ assert (Value2VPValue.count (V) && " Value does not exist in VPlan" );
779
+ return Value2VPValue[V];
780
+ }
781
+
657
782
private:
658
783
// / Add to the given dominator tree the header block and every new basic block
659
784
// / that was created between it and the latch block, inclusive.
0 commit comments