Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -486,8 +486,8 @@ /// non-null. Use \p State to translate given VPValues to IR values in the /// vectorized loop. void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Addr, - VPValue *BlockInMask = nullptr); + VPValue *Addr, VPValue *StoredValue, + VPValue *BlockInMask); /// Set the debug location in the builder using the debug location in /// the instruction. @@ -2348,12 +2348,15 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, VPValue *Addr, + VPValue *StoredValue, VPValue *BlockInMask) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast(Instr); StoreInst *SI = dyn_cast(Instr); assert((LI || SI) && "Invalid Load/Store instruction"); + assert((!SI || StoredValue) && "No stored value provided for widened store"); + assert((!LI || !StoredValue) && "Stored value provided for widened load"); LoopVectorizationCostModel::InstWidening Decision = Cost->getWideningDecision(Instr, VF); @@ -2425,7 +2428,7 @@ for (unsigned Part = 0; Part < UF; ++Part) { Instruction *NewSI = nullptr; - Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part); + Value *StoredVal = State.get(StoredValue, Part); if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; Value *VectorGep = State.get(Addr, Part); @@ -6807,7 +6810,13 @@ Mask = createBlockInMask(I->getParent(), Plan); VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); - return new VPWidenMemoryInstructionRecipe(*I, Addr, Mask); + if (LoadInst *Load = dyn_cast(I)) + return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); + else { + StoreInst *Store = cast(I); + VPValue *StoredValue = Plan->getOrAddVPValue(Store->getValueOperand()); + return new VPWidenMemoryInstructionRecipe(*Store, Addr, StoredValue, Mask); + } } VPWidenIntOrFpInductionRecipe * @@ -7515,7 +7524,9 @@ } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { - State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask()); + VPValue *StoredValue = isa(Instr) ? getStoredValue() : nullptr; + State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue, + getMask()); } // Determine how to lower the scalar epilogue, which depends on 1) optimising Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1061,6 +1061,9 @@ }; /// A Recipe for widening load/store operations. +/// The recipe uses the following VPValues: +/// - For load: Address, optional mask +/// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. class VPWidenMemoryInstructionRecipe : public VPRecipeBase { @@ -1068,12 +1071,28 @@ Instruction &Instr; VPUser User; + void setMask(VPValue *Mask) { + if (!Mask) + return; + User.addOperand(Mask); + } + + bool isMasked() const { + return (isa(Instr) && User.getNumOperands() == 2) || + (isa(Instr) && User.getNumOperands() == 3); + } + public: - VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr, - VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) { - if (Mask) - User.addOperand(Mask); + VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) + : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Load), User({Addr}) { + setMask(Mask); + } + + VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, VPValue *Mask) + : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Store), + User({Addr, StoredValue}) { + setMask(Mask); } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1089,8 +1108,15 @@ /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { - // Mask is optional and therefore the last, currently 2nd operand. - return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; + // Mask is optional and therefore the last operand. + return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr; + } + + /// Return the address accessed by this recipe. + VPValue *getStoredValue() const { + assert(isa(Instr) && + "Stored value only available for store instructions"); + return User.getOperand(1); // Stored value is the 2nd, mandatory operand. } /// Generate the wide load/store. Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -55,10 +55,14 @@ VPRecipeBase *NewRecipe = nullptr; // Create VPWidenMemoryInstructionRecipe for loads and stores. - if (isa(Inst) || isa(Inst)) + if (LoadInst *Load = dyn_cast(Inst)) NewRecipe = new VPWidenMemoryInstructionRecipe( - *Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), + *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), nullptr /*Mask*/); + else if (StoreInst *Store = dyn_cast(Inst)) + NewRecipe = new VPWidenMemoryInstructionRecipe( + *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), + Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/); else if (PHINode *Phi = dyn_cast(Inst)) { InductionDescriptor II = Inductions.lookup(Phi); if (II.getKind() == InductionDescriptor::IK_IntInduction ||