diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -531,6 +531,10 @@ /// value into a vector. Value *getOrCreateVectorValue(Value *V, unsigned Part); + void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) { + VectorLoopValueMap.setVectorValue(Scalar, Part, Vector); + } + /// Return a value in the new loop corresponding to \p V from the original /// loop at unroll and vector indices \p Instance. If the value has been /// vectorized but not scalarized, the necessary extractelement instruction @@ -553,8 +557,8 @@ /// non-null. Use \p State to translate given VPValues to IR values in the /// vectorized loop. void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Addr, VPValue *StoredValue, - VPValue *BlockInMask); + VPValue *Def, VPValue *Addr, + VPValue *StoredValue, VPValue *BlockInMask); /// Set the debug location in the builder using the debug location in /// the instruction. @@ -2503,11 +2507,9 @@ } } -void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, - VPTransformState &State, - VPValue *Addr, - VPValue *StoredValue, - VPValue *BlockInMask) { +void InnerLoopVectorizer::vectorizeMemoryInstruction( + Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr, + VPValue *StoredValue, VPValue *BlockInMask) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast(Instr); StoreInst *SI = dyn_cast(Instr); @@ -2636,7 +2638,8 @@ if (Reverse) NewLI = reverseVector(NewLI); } - VectorLoopValueMap.setVectorValue(Instr, Part, NewLI); + + State.set(Def, Instr, NewLI, Part); } } @@ -7754,6 +7757,16 @@ if (auto Recipe = RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { + // Check if the recipe can be converted to a VPValue. We need the extra + // down-casting step until VPRecipeBase inherits from VPValue. + VPValue *MaybeVPValue = Recipe->toVPValue(); + if (!Instr->getType()->isVoidTy() && MaybeVPValue) { + if (NeedDef.contains(Instr)) + Plan->addOrReplaceVPValue(Instr, MaybeVPValue); + else + Plan->addVPValue(Instr, MaybeVPValue); + } + RecipeBuilder.setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); continue; @@ -7803,7 +7816,14 @@ for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *Member = IG->getMember(i)) { + VPValue *NewVPV = nullptr; + if (!Member->getType()->isVoidTy()) { + NewVPV = new VPValue(Member); + Plan->getVPValue(Member)->replaceAllUsesWith(NewVPV); + } RecipeBuilder.getRecipe(Member)->eraseFromParent(); + if (NewVPV) + Plan->addVPValue(Member, NewVPV); } } @@ -8145,9 +8165,11 @@ } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { + Instruction *Instr = getUnderlyingInstr(); VPValue *StoredValue = isa(Instr) ? getStoredValue() : nullptr; - State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue, - getMask()); + State.ILV->vectorizeMemoryInstruction(Instr, State, + StoredValue ? nullptr : this, getAddr(), + StoredValue, getMask()); } // Determine how to lower the scalar epilogue, which depends on 1) optimising @@ -8193,6 +8215,12 @@ return CM_ScalarEpilogueAllowed; } +void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V, + unsigned Part) { + set(Def, V, Part); + ILV->setVectorValue(IRDef, Part, V); +} + // Process the loop in the VPlan-native vectorization path. This path builds // VPlan upfront in the vectorization pipeline, which allows to apply // VPlan-to-VPlan transformations from the very beginning without modifying the diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -282,6 +282,10 @@ // delegates the call to ILV below. if (Data.PerPartOutput.count(Def)) { auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; + if (!VecPart->getType()->isVectorTy()) { + assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); + return VecPart; + } // TODO: Cache created scalar values. return Builder.CreateExtractElement(VecPart, Builder.getInt32(Instance.Lane)); @@ -298,6 +302,7 @@ } Data.PerPartOutput[Def][Part] = V; } + void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. @@ -684,6 +689,20 @@ /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or /// nullptr otherwise. VPValue *toVPValue(); + const VPValue *toVPValue() const; + + /// Returns the underlying instruction, if the recipe is a VPValue or nullptr + /// otherwise. + Instruction *getUnderlyingInstr() { + if (auto *VPV = toVPValue()) + return cast_or_null(VPV->getUnderlyingValue()); + return nullptr; + } + const Instruction *getUnderlyingInstr() const { + if (auto *VPV = toVPValue()) + return cast_or_null(VPV->getUnderlyingValue()); + return nullptr; + } }; inline bool VPUser::classof(const VPRecipeBase *Recipe) { @@ -725,10 +744,6 @@ void generateInstruction(VPTransformState &State, unsigned Part); protected: - Instruction *getUnderlyingInstr() { - return cast_or_null(getUnderlyingValue()); - } - void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } public: @@ -1207,8 +1222,9 @@ /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser { - Instruction &Instr; +class VPWidenMemoryInstructionRecipe : public VPRecipeBase, + public VPValue, + public VPUser { void setMask(VPValue *Mask) { if (!Mask) @@ -1217,20 +1233,22 @@ } bool isMasked() const { - return (isa(Instr) && getNumOperands() == 2) || - (isa(Instr) && getNumOperands() == 3); + return (isa(getUnderlyingInstr()) && getNumOperands() == 2) || + (isa(getUnderlyingInstr()) && getNumOperands() == 3); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}), Instr(Load) { + : VPRecipeBase(VPWidenMemoryInstructionSC), + VPValue(VPValue::VPMemoryInstructionSC, &Load), VPUser({Addr}) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}), - Instr(Store) { + : VPRecipeBase(VPWidenMemoryInstructionSC), + VPValue(VPValue::VPMemoryInstructionSC, &Store), + VPUser({Addr, StoredValue}) { setMask(Mask); } @@ -1253,7 +1271,7 @@ /// Return the address accessed by this recipe. VPValue *getStoredValue() const { - assert(isa(Instr) && + assert(isa(getUnderlyingInstr()) && "Stored value only available for store instructions"); return getOperand(1); // Stored value is the 2nd, mandatory operand. } @@ -1619,6 +1637,10 @@ /// VPlan. Value2VPValueTy Value2VPValue; + /// Contains all VPValues that been allocated by addVPValue directly and need + /// to be free when the plan's destructor is called. + SmallVector VPValuesToFree; + /// Holds the VPLoopInfo analysis for this VPlan. VPLoopInfo VPLInfo; @@ -1634,8 +1656,8 @@ ~VPlan() { if (Entry) VPBlockBase::deleteCFG(Entry); - for (auto &MapEntry : Value2VPValue) - delete MapEntry.second; + for (VPValue *VPV : VPValuesToFree) + delete VPV; if (BackedgeTakenCount) delete BackedgeTakenCount; for (VPValue *Def : VPExternalDefs) @@ -1685,7 +1707,24 @@ void addVPValue(Value *V) { assert(V && "Trying to add a null Value to VPlan"); assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); - Value2VPValue[V] = new VPValue(V); + VPValue *VPV = new VPValue(V); + Value2VPValue[V] = VPV; + VPValuesToFree.push_back(VPV); + } + + void addVPValue(Value *V, VPValue *VPV) { + assert(V && "Trying to add a null Value to VPlan"); + assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); + Value2VPValue[V] = VPV; + } + + void addOrReplaceVPValue(Value *V, VPValue *VPV) { + assert(V && "Trying to add a null Value to VPlan"); + auto I = Value2VPValue.find(V); + if (I == Value2VPValue.end()) + Value2VPValue[V] = VPV; + else + I->second = VPV; } VPValue *getVPValue(Value *V) { @@ -1701,6 +1740,8 @@ return getVPValue(V); } + void removeVPValueFor(Value *V) { Value2VPValue.erase(V); } + /// Return the VPLoopInfo analysis for this VPlan. VPLoopInfo &getVPLoopInfo() { return VPLInfo; } const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } @@ -1782,9 +1823,9 @@ }; struct VPlanIngredient { - Value *V; + const Value *V; - VPlanIngredient(Value *V) : V(V) {} + VPlanIngredient(const Value *V) : V(V) {} }; inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -101,6 +101,22 @@ return nullptr; } +VPValue *VPRecipeBase::toVPValue() { + if (auto *V = dyn_cast(this)) + return V; + if (auto *V = dyn_cast(this)) + return V; + return nullptr; +} + +const VPValue *VPRecipeBase::toVPValue() const { + if (auto *V = dyn_cast(this)) + return V; + if (auto *V = dyn_cast(this)) + return V; + return nullptr; +} + // Get the top-most entry block of \p Start. This is the entry block of the // containing VPlan. This function is templated to support both const and non-const blocks template static T *getPlanEntry(T *Start) { @@ -405,14 +421,15 @@ Parent = nullptr; } -VPValue *VPRecipeBase::toVPValue() { - if (auto *V = dyn_cast(this)) - return V; - return nullptr; -} - iplist::iterator VPRecipeBase::eraseFromParent() { assert(getParent() && "Recipe not in any VPBasicBlock"); + // If the recipe is a VPValue and has been added to the containing VPlan, + // remove the mapping. + if (Value *UV = getUnderlyingInstr()) + if (!UV->getType()->isVoidTy()) + if (auto *Plan = getParent()->getPlan()) + Plan->removeVPValueFor(UV); + return getParent()->getRecipeList().erase(getIterator()); } @@ -903,7 +920,8 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"WIDEN " << Instruction::getOpcodeName(Instr.getOpcode()) << " "; + O << "\"WIDEN " + << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; bool First = true; for (VPValue *Op : operands()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -43,6 +43,7 @@ friend class VPBasicBlock; friend class VPInterleavedAccessInfo; friend class VPSlotTracker; + friend class VPRecipeBase; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -77,7 +78,7 @@ /// are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPValue objects. They are used for concrete /// type identification. - enum { VPValueSC, VPInstructionSC }; + enum { VPValueSC, VPInstructionSC, VPMemoryInstructionSC }; VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {} VPValue(const VPValue &) = delete;