Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -214,6 +214,8 @@ VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} Value *getOrCreateVectorValues(Value *V, unsigned Part) override; + Value *getOrCreateScalarValue(Value *V, + const VPIteration &Instance) override; }; /// A builder used to construct the current plan. Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -484,13 +484,14 @@ /// Try to vectorize the interleaved access group that \p Instr belongs to, /// optionally masking the vector operations if \p BlockInMask is non-null. - void vectorizeInterleaveGroup(Instruction *Instr, - VectorParts *BlockInMask = nullptr); + void vectorizeInterleaveGroup(Instruction *Instr, VPTransformState &State, + VPValue *Addr, VPValue *BlockInMask = nullptr); /// Vectorize Load and Store instructions, optionally masking the vector /// operations if \p BlockInMask is non-null. - void vectorizeMemoryInstruction(Instruction *Instr, - VectorParts *BlockInMask = nullptr); + void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, + VPValue *Addr, + VPValue *BlockInMask = nullptr); /// Set the debug location in the builder using the debug location in /// the instruction. @@ -2161,7 +2162,9 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, - VectorParts *BlockInMask) { + VPTransformState &State, + VPValue *Addr, + VPValue *BlockInMask) { const InterleaveGroup<Instruction> *Group = Cost->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); @@ -2171,23 +2174,21 @@ return; const DataLayout &DL = Instr->getModule()->getDataLayout(); - Value *Ptr = getLoadStorePointerOperand(Instr); // Prepare for the vector type of the interleaved load/store. Type *ScalarTy = getMemInstValueType(Instr); unsigned InterleaveFactor = Group->getFactor(); Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF); - Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr)); // Prepare for the new pointers. - setDebugLocFromInst(Builder, Ptr); SmallVector<Value *, 2> NewPtrs; unsigned Index = Group->getIndex(Instr); - VectorParts Mask; + VectorParts Mask(UF); bool IsMaskForCondRequired = BlockInMask; if (IsMaskForCondRequired) { - Mask = *BlockInMask; + for (unsigned Part = 0; Part < UF; ++Part) + Mask[Part] = State.get(BlockInMask, Part); // TODO: extend the masked interleaved-group support to reversed access. assert(!Group->isReverse() && "Reversed masked interleave-group " "not supported."); @@ -2202,12 +2203,13 @@ if (Group->isReverse()) Index += (VF - 1) * Group->getFactor(); - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - for (unsigned Part = 0; Part < UF; Part++) { - Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0}); + Value *NewPtr = State.get(Addr, {Part, 0}); + setDebugLocFromInst(Builder, NewPtr); + + bool InBounds = false; + if (auto *gep = dyn_cast<GetElementPtrInst>(NewPtr->stripPointerCasts())) + InBounds = gep->isInBounds(); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. @@ -2225,6 +2227,8 @@ cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true); // Cast to the vector pointer type. + unsigned AddressSpace = NewPtr->getType()->getPointerAddressSpace(); + Type *PtrTy = VecTy->getPointerTo(AddressSpace); NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy)); } @@ -2348,7 +2352,9 @@ } void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, - VectorParts *BlockInMask) { + VPTransformState &State, + VPValue *Addr, + VPValue *BlockInMask) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast<LoadInst>(Instr); StoreInst *SI = dyn_cast<StoreInst>(Instr); @@ -2360,17 +2366,15 @@ assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point"); if (Decision == LoopVectorizationCostModel::CM_Interleave) - return vectorizeInterleaveGroup(Instr); + return vectorizeInterleaveGroup(Instr, State, Addr, BlockInMask); Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); - Value *Ptr = getLoadStorePointerOperand(Instr); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. const DataLayout &DL = Instr->getModule()->getDataLayout(); const Align Alignment = DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy); - unsigned AddressSpace = getLoadStoreAddressSpace(Instr); // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. @@ -2385,24 +2389,20 @@ assert((ConsecutiveStride || CreateGatherScatter) && "The instruction should be scalarized"); - // Handle consecutive loads/stores. - if (ConsecutiveStride) - Ptr = getOrCreateScalarValue(Ptr, {0, 0}); - - VectorParts Mask; + VectorParts Mask(UF); bool isMaskRequired = BlockInMask; if (isMaskRequired) - Mask = *BlockInMask; - - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>( - getLoadStorePointerOperand(Instr)->stripPointerCasts())) - InBounds = gep->isInBounds(); + for (unsigned Part = 0; Part < UF; ++Part) + Mask[Part] = State.get(BlockInMask, Part); const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { // Calculate the pointer for the specific unroll-part. GetElementPtrInst *PartPtr = nullptr; + bool InBounds = false; + if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = gep->isInBounds(); + if (Reverse) { // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. @@ -2420,6 +2420,7 @@ PartPtr->setIsInBounds(InBounds); } + unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); }; @@ -2432,7 +2433,7 @@ Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part); if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; - Value *VectorGep = getOrCreateVectorValue(Ptr, Part); + Value *VectorGep = State.get(Addr, Part); NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment.value(), MaskPart); } else { @@ -2443,7 +2444,7 @@ // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = CreateVecPtr(Part, Ptr); + auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0})); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment.value(), Mask[Part]); @@ -2463,12 +2464,12 @@ Value *NewLI; if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; - Value *VectorGep = getOrCreateVectorValue(Ptr, Part); + Value *VectorGep = State.get(Addr, Part); NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart, nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { - auto *VecPtr = CreateVecPtr(Part, Ptr); + auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0})); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part], UndefValue::get(DataTy), @@ -6759,7 +6760,11 @@ if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); - return new VPWidenMemoryInstructionRecipe(*I, Mask); + Value *Addr = getLoadStorePointerOperand(I); + assert(Addr && "Expected a load/store at this point"); + + return new VPWidenMemoryInstructionRecipe(*I, Plan->getOrAddVPValue(Addr), + Mask); } VPWidenIntOrFpInductionRecipe * @@ -7208,7 +7213,8 @@ for (auto IG : InterleaveGroups) { auto *Recipe = cast<VPWidenMemoryInstructionRecipe>( RecipeBuilder.getRecipe(IG->getInsertPos())); - (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe); + (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask())) + ->insertBefore(Recipe); for (unsigned i = 0; i < IG->getFactor(); ++i) if (Instruction *Member = IG->getMember(i)) { @@ -7282,13 +7288,21 @@ return ILV.getOrCreateVectorValue(V, Part); } +Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue( + Value *V, const VPIteration &Instance) { + return ILV.getOrCreateScalarValue(V, Instance); +} + void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; IG->getInsertPos()->printAsOperand(O, false); - if (User) { + O << ", "; + getAddr()->printAsOperand(O); + VPValue *Mask = getMask(); + if (Mask) { O << ", "; - User->getOperand(0)->printAsOperand(O); + Mask->printAsOperand(O); } O << "\\l\""; for (unsigned i = 0; i < IG->getFactor(); ++i) @@ -7357,15 +7371,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - if (!User) - return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); - - // Last (and currently only) operand is a mask. - InnerLoopVectorizer::VectorParts MaskValues(State.UF); - VPValue *Mask = User->getOperand(User->getNumOperands() - 1); - for (unsigned Part = 0; Part < State.UF; ++Part) - MaskValues[Part] = State.get(Mask, Part); - State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues); + State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), State, getAddr(), + getMask()); } void VPReplicateRecipe::execute(VPTransformState &State) { @@ -7452,14 +7459,7 @@ } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { - VPValue *Mask = getMask(); - if (!Mask) - return State.ILV->vectorizeMemoryInstruction(&Instr); - - InnerLoopVectorizer::VectorParts MaskValues(State.UF); - for (unsigned Part = 0; Part < State.UF; ++Part) - MaskValues[Part] = State.get(Mask, Part); - State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); + State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask()); } static ScalarEpilogueLowering Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -227,6 +227,8 @@ struct VPCallback { virtual ~VPCallback() {} virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0; + virtual Value *getOrCreateScalarValue(Value *V, + const VPIteration &Instance) = 0; }; /// VPTransformState holds information passed down when "executing" a VPlan, @@ -269,6 +271,15 @@ return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); } + /// Get the generated Value for a given VPValue and given Part and Lane. Note + /// that as some Defs are still created by ILV and managed in its ValueMap, + /// this method will delegate the call to ILV in such cases in order to + /// provide callers a consistent API. + Value *get(VPValue *Def, const VPIteration &Instance) { + // Def is managed by ILV: bring the Values from ValueMap. + return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance); + } + /// Set the generated Value for a given VPValue and a given Part. void set(VPValue *Def, Value *V, unsigned Part) { if (!Data.PerPartOutput.count(Def)) { @@ -862,13 +873,14 @@ class VPInterleaveRecipe : public VPRecipeBase { private: const InterleaveGroup<Instruction> *IG; - std::unique_ptr<VPUser> User; + VPUser User; public: - VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), IG(IG) { - if (Mask) // Create a VPInstruction to register as a user of the mask. - User.reset(new VPUser({Mask})); + VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, + VPValue *Mask) + : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) { + if (Mask) + User.addOperand(Mask); } ~VPInterleaveRecipe() override = default; @@ -877,6 +889,19 @@ return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; } + /// Return the address accessed by this recipe. + VPValue *getAddr() const { + // Address is the 1st operand. + return User.getOperand(0); + } + + /// Return the mask used by this recipe. Note that a full mask is represented + /// by a nullptr. + VPValue *getMask() const { + // Mask is the 2nd operand. + return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; + } + /// Generate the wide load or store, and shuffles. void execute(VPTransformState &State) override; @@ -999,13 +1024,14 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { private: Instruction &Instr; - std::unique_ptr<VPUser> User; + VPUser User; public: - VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) { - if (Mask) // Create a VPInstruction to register as a user of the mask. - User.reset(new VPUser({Mask})); + VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr, + VPValue *Mask) + : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) { + if (Mask) + User.addOperand(Mask); } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1013,11 +1039,17 @@ return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; } + /// Return the address accessed by this recipe. + VPValue *getAddr() const { + // Address is the 1st operand. + return User.getOperand(0); + } + /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. - VPValue *getMask() { - // Mask is the last operand. - return User ? User->getOperand(User->getNumOperands() - 1) : nullptr; + VPValue *getMask() const { + // Mask is the 2nd operand. + return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; } /// Generate the wide load/store. @@ -1414,6 +1446,13 @@ return Value2VPValue[V]; } + VPValue *getOrAddVPValue(Value *V) { + assert(V && "Trying to get or add the VPValue of a null Value"); + if (!Value2VPValue.count(V)) + addVPValue(V); + return getVPValue(V); + } + /// Return the VPLoopInfo analysis for this VPlan. VPLoopInfo &getVPLoopInfo() { return VPLInfo; } const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -735,9 +735,12 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); - if (User) { + O << ", "; + getAddr()->printAsOperand(O); + VPValue *Mask = getMask(); + if (Mask) { O << ", "; - User->getOperand(0)->printAsOperand(O); + Mask->printAsOperand(O); } O << "\\l\""; } Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -56,7 +56,9 @@ VPRecipeBase *NewRecipe = nullptr; // Create VPWidenMemoryInstructionRecipe for loads and stores. if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) - NewRecipe = new VPWidenMemoryInstructionRecipe(*Inst, nullptr /*Mask*/); + NewRecipe = new VPWidenMemoryInstructionRecipe( + *Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), + nullptr /*Mask*/); else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) { InductionDescriptor II = Inductions->lookup(Phi); if (II.getKind() == InductionDescriptor::IK_IntInduction ||